Skip to content

Commit

Permalink
assembleContigs fixes (#375)
Browse files Browse the repository at this point in the history
* This fixes bug 1 from #372

* Fixes IllegalArgumentException due to positionMaps[i].get(positionMaps[i].size() - 1) != ranges[i].getTo() - 1. Add clone id information to exceptions in FullSeqAssembler

* Fixes bug 2 form #372

* Fixes behaviour with empty targets in FullSeqAssembler

* Minor fixes.

* New PrimitivIO

* Test simplification.

* Serialization fix.

* Magic bytes for clna / clns / vdjca formats incremented. All backward compatibility dropped.
  • Loading branch information
dbolotin committed May 5, 2018
1 parent 6d903cb commit 3cffa58
Show file tree
Hide file tree
Showing 24 changed files with 95 additions and 142 deletions.
2 changes: 1 addition & 1 deletion repseqio
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,15 @@ BranchSequences assembleBranchSequences(int[] points, VariantBranch branch) {
if (currentPosition != nextPosition - 1) {
sequences.add(sequenceBuilder.createAndDestroy());
positionMaps.add(positionMap);
ranges.add(new Range(blockStartPosition, currentPosition + 1));

// Naive:
// ranges.add(new Range(blockStartPosition, currentPosition + 1));
// Eliminate edge deletions:
ranges.add(
positionMap.isEmpty()
? new Range(blockStartPosition, currentPosition + 1)
: new Range(positionMap.get(0), positionMap.get(positionMap.size() - 1) + 1));

sequenceBuilder = new NSequenceWithQualityBuilder();
positionMap = new TIntArrayList();
blockStartPosition = nextPosition;
Expand Down Expand Up @@ -463,6 +471,8 @@ boolean check() {
if (ranges.length != positionMaps.length && ranges.length != sequences.length)
throw new IllegalArgumentException();
for (int i = 0; i < ranges.length; i++) {
if (positionMaps[i].isEmpty())
continue;
if (positionMaps[i].get(0) != ranges[i].getFrom() || positionMaps[i].get(positionMaps[i].size() - 1) != ranges[i].getTo() - 1)
throw new IllegalArgumentException();
if (positionMaps[i].size() != sequences[i].size())
Expand Down Expand Up @@ -570,6 +580,10 @@ private Clone buildClone(double count, BranchSequences targets) {
// ...V - VVVV - V+CDR3+J - J...

if (range.getTo() < N_LEFT_DUMMIES + lengthV) {
if (range.getTo() <= N_LEFT_DUMMIES)
// Target outside V region (to the left of V region)
continue;

boolean floatingLeftBound =
!parameters.alignedRegionsOnly
&& i == 0
Expand Down Expand Up @@ -670,6 +684,10 @@ else if (jFloatingRightBound)
jOffset, range.getTo() - rightAssemblingFeatureBound,
targets.assemblingFeatureOffset + assemblingFeatureLength, sequence.size() - (targets.assemblingFeatureOffset + assemblingFeatureLength));
} else if (range.getFrom() > rightAssemblingFeatureBound) {
if (range.getFrom() >= rightAssemblingFeatureBound + jLength)
// Target outside J region (to the right of J region)
continue;

boolean floatingRightBound =
!parameters.alignedRegionsOnly
&& i == targets.ranges.length - 1
Expand Down Expand Up @@ -982,6 +1000,7 @@ public RawVariantsData calculateRawData(Supplier<OutputPort<VDJCAlignments>> ali
// Collecting coverage and VariantAggregators
int nAlignments = 0;
for (VDJCAlignments al : CUtils.it(alignments.get())) {
al = al.updateAlignments(AlignmentUtils::shiftIndelsAtHomopolymers);
++nAlignments;
for (PointSequence point : toPointSequences(al)) {
int seqIndex = getVariantIndex(point.sequence.getSequence());
Expand Down Expand Up @@ -1030,6 +1049,7 @@ public RawVariantsData calculateRawData(Supplier<OutputPort<VDJCAlignments>> ali
// Main data collection loop
i = 0;
for (VDJCAlignments al : CUtils.it(alignments.get())) {
al = al.updateAlignments(AlignmentUtils::shiftIndelsAtHomopolymers);
for (PointSequence point : toPointSequences(al)) {
int pointIndex = revIndex.get(point.point);
packedData[pointIndex][i] =
Expand Down Expand Up @@ -1285,7 +1305,7 @@ void toPointSequencesByAlignments(List<PointSequence> points,
// if (seq2Range.length() == 0)
// return;

alignment = AlignmentUtils.shiftIndelsAtHomopolymers(alignment);
// alignment = AlignmentUtils.shiftIndelsAtHomopolymers(alignment);

Range
alSeq2Range = alignment.getSequence2Range(),
Expand Down
11 changes: 5 additions & 6 deletions src/main/java/com/milaboratory/mixcr/basictypes/ClnAWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
import cc.redberry.pipe.OutputPort;
import cc.redberry.pipe.OutputPortCloseable;
import cc.redberry.pipe.util.CountingOutputPort;
import com.milaboratory.mixcr.assembler.CloneAssemblerParameters;
import com.milaboratory.mixcr.util.MiXCRVersionInfo;
import com.milaboratory.primitivio.PipeDataInputReader;
import com.milaboratory.primitivio.PrimitivI;
Expand All @@ -59,8 +58,8 @@
* writeAlignmentsAndIndex() 5. close()
*/
public final class ClnAWriter implements AutoCloseable, CanReportProgressAndStage {
static final String MAGIC_V1 = "MiXCR.CLNA.V01";
static final String MAGIC = MAGIC_V1;
static final String MAGIC_V2 = "MiXCR.CLNA.V02";
static final String MAGIC = MAGIC_V2;
static final int MAGIC_LENGTH = MAGIC.length();

/**
Expand Down Expand Up @@ -175,8 +174,9 @@ public synchronized void sortAlignments(OutputPort<VDJCAlignments> alignments,

// Sorting alignments by cloneId and then by mapping type (core alignments will be written before all others)
// and saving sorting output port
sortedAlignments = Sorter.sort(
toSorter = new CountingOutputPort<>(alignments),
this.toSorter = new CountingOutputPort<>(alignments);
this.sortedAlignments = Sorter.sort(
toSorter,
(o1, o2) -> {
int i = Integer.compare(o1.cloneIndex, o2.cloneIndex);
if (i != 0)
Expand Down Expand Up @@ -309,7 +309,6 @@ else if (sortedAlignments == null) {
return 1.0 * toSorter.getCount() / numberOfAlignments;
} else
return 1.0 * numberOfAlignmentsWritten / numberOfAlignments;

}

@Override
Expand Down
24 changes: 5 additions & 19 deletions src/main/java/com/milaboratory/mixcr/basictypes/CloneSetIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
import com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters;
import com.milaboratory.primitivio.PrimitivI;
import com.milaboratory.primitivio.PrimitivO;
import com.milaboratory.primitivio.SerializersManager;
import com.milaboratory.util.CanReportProgressAndStage;
import io.repseq.core.*;

Expand All @@ -44,9 +43,8 @@
import java.util.List;

public final class CloneSetIO {
static final String MAGIC_V5 = "MiXCR.CLNS.V05";
static final String MAGIC_V6 = "MiXCR.CLNS.V06";
static final String MAGIC = MAGIC_V6;
static final String MAGIC_V7 = "MiXCR.CLNS.V07";
static final String MAGIC = MAGIC_V7;
static final int MAGIC_LENGTH = 14;
static final byte[] MAGIC_BYTES = MAGIC.getBytes(StandardCharsets.US_ASCII);

Expand Down Expand Up @@ -179,12 +177,9 @@ public static CloneSet readClns(InputStream inputStream, VDJCLibraryRegistry lib

String magicString = new String(magicBytes);

SerializersManager serializersManager = input.getSerializersManager();
// SerializersManager serializersManager = input.getSerializersManager();

switch (magicString) {
case MAGIC_V5:
serializersManager.registerCustomSerializer(Clone.class, new CompatibilityIO.CloneSerializerV5());
break;
case MAGIC:
break;
default:
Expand All @@ -194,19 +189,10 @@ public static CloneSet readClns(InputStream inputStream, VDJCLibraryRegistry lib

String versionInfo = input.readUTF();

if (!magicString.equals(MAGIC))
// Dropping this field for v5 files
input.readObject(GeneFeature[].class);

VDJCAlignerParameters alignerParameters;
CloneAssemblerParameters assemblerParameters;
if (magicString.equals(MAGIC)) {
alignerParameters = input.readObject(VDJCAlignerParameters.class);
assemblerParameters = input.readObject(CloneAssemblerParameters.class);
} else {
alignerParameters = null;
assemblerParameters = null;
}
alignerParameters = input.readObject(VDJCAlignerParameters.class);
assemblerParameters = input.readObject(CloneAssemblerParameters.class);

EnumMap<GeneType, GeneFeature> alignedFeatures = IO.readGF2GTMap(input);
List<VDJCGene> genes = IOUtil.readAndRegisterGeneReferences(input, libraryRegistry, new GT2GFAdapter(alignedFeatures));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,37 +40,4 @@
public final class CompatibilityIO {
private CompatibilityIO() {
}

public static class CloneSerializerV5 implements Serializer<Clone> {
@Override
public void write(PrimitivO output, Clone object) {
throw new UnsupportedOperationException();
}

@Override
public Clone read(PrimitivI input) {
NSequenceWithQuality[] targets = input.readObject(NSequenceWithQuality[].class);
int size = input.readByte();
EnumMap<GeneType, VDJCHit[]> hits = new EnumMap<>(GeneType.class);
for (int i = 0; i < size; i++) {
GeneType key = input.readObject(GeneType.class);
hits.put(key, input.readObject(VDJCHit[].class));
}
double count = input.readLong();
int id = input.readInt();
// Skipping the field
input.readObject(GeneFeature[].class);
return new Clone(targets, hits, count, id);
}

@Override
public boolean isReference() {
return true;
}

@Override
public boolean handlesReference() {
return false;
}
}
}
33 changes: 0 additions & 33 deletions src/main/java/com/milaboratory/mixcr/basictypes/IO.java
Original file line number Diff line number Diff line change
Expand Up @@ -125,39 +125,6 @@ public boolean handlesReference() {
}
}

public static class VDJCAlignmentsSerializer21 implements Serializer<VDJCAlignments> {
@Override
public void write(PrimitivO output, VDJCAlignments object) {
throw new RuntimeException("Backward compatibility reader! Write not implemented.");
}

@Override
public VDJCAlignments read(PrimitivI input) {
NSequenceWithQuality[] targets = input.readObject(NSequenceWithQuality[].class);
input.readObject(String[].class);
input.readObject(NSequenceWithQuality[].class);
input.readObject(String[].class);
int size = input.readByte();
EnumMap<GeneType, VDJCHit[]> hits = new EnumMap<>(GeneType.class);
for (int i = 0; i < size; i++) {
GeneType key = input.readObject(GeneType.class);
hits.put(key, input.readObject(VDJCHit[].class));
}
return new VDJCAlignments(input.readLong(), hits, targets,
new SequenceHistory[0], new SequenceRead[0]);
}

@Override
public boolean isReference() {
return true;
}

@Override
public boolean handlesReference() {
return false;
}
}

public static class CloneSerializer implements Serializer<Clone> {
@Override
public void write(PrimitivO output, Clone object) {
Expand Down
3 changes: 3 additions & 0 deletions src/main/java/com/milaboratory/mixcr/basictypes/IOUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ public static void registerGeneReferences(PrimitivO output, List<VDJCGene> genes
HasFeatureToAlign featuresToAlign) {
// Putting genes references and feature sequences to be serialized/deserialized as references
for (VDJCGene gene : genes) {
// Each gene is singleton
output.putKnownReference(gene);
// Also put sequences of certain gene features of genes as known references if required
if (featuresToAlign != null) {
Expand All @@ -66,6 +67,8 @@ public static void registerGeneReferences(PrimitivO output, List<VDJCGene> genes
NucleotideSequence featureSequence = gene.getFeature(featureToAlign);
if (featureSequence == null)
continue;
// Relies on the fact that sequences of gene features are cached,
// the same instance will be used everywhere (including alignments)
output.putKnownReference(gene.getFeature(featuresToAlign.getFeatureToAlign(gene.getGeneType())));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import java.util.Collections;
import java.util.EnumMap;
import java.util.List;
import java.util.function.Function;

@Serializable(by = IO.VDJCAlignmentsSerializer.class)
public final class VDJCAlignments extends VDJCObject {
Expand Down Expand Up @@ -140,6 +141,12 @@ public VDJCAlignments updateCloneIndex(int newCloneIndex) {
mappingType, newCloneIndex);
}

public VDJCAlignments updateAlignments(Function<Alignment<NucleotideSequence>, Alignment<NucleotideSequence>> processor) {
EnumMap<GeneType, VDJCHit[]> newHits = this.hits.clone();
newHits.replaceAll((k, v) -> Arrays.stream(v).map(h -> h.updateAlignments(processor)).toArray(VDJCHit[]::new));
return new VDJCAlignments(alignmentsIndex, newHits, targets, history, originalReads, mappingType, cloneIndex);
}

public VDJCAlignments shiftReadId(long newAlignmentIndex, long shift) {
return new VDJCAlignments(newAlignmentIndex, hits, targets, shift(history, shift), shift(originalReads, shift));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
import com.milaboratory.core.io.CompressionType;
import com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters;
import com.milaboratory.primitivio.PrimitivI;
import com.milaboratory.primitivio.SerializersManager;
import com.milaboratory.util.CanReportProgress;
import com.milaboratory.util.CountingInputStream;
import gnu.trove.list.array.TLongArrayList;
Expand Down Expand Up @@ -138,10 +137,8 @@ void init(Map<GeneFeature, GeneFeature> geneFeatureRefs) {
String magicString = new String(magic);
this.magic = magicString;

SerializersManager serializersManager = input.getSerializersManager();
// SerializersManager serializersManager = input.getSerializersManager();
switch (magicString) {
case MAGIC_V9:
serializersManager.registerCustomSerializer(VDJCAlignments.class, new IO.VDJCAlignmentsSerializer21());
case MAGIC:
break;
default:
Expand Down Expand Up @@ -171,7 +168,7 @@ void init(Map<GeneFeature, GeneFeature> geneFeatureRefs) {
// parameters.getGeneAlignerParameters(gt).setGeneFeatureToAlign(featureParams);

if (featureDeserialized != null)
input.putKnownReference(featureParams);
input.putKnownObject(featureParams);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,8 @@
import java.util.List;

public final class VDJCAlignmentsWriter implements VDJCAlignmentsWriterI {
static final String MAGIC_V3 = "MiXCR.VDJC.V03";
static final String MAGIC_V4 = "MiXCR.VDJC.V04";
static final String MAGIC_V5 = "MiXCR.VDJC.V05";
static final String MAGIC_V6 = "MiXCR.VDJC.V06";
static final String MAGIC_V7 = "MiXCR.VDJC.V07";
static final String MAGIC_V8 = "MiXCR.VDJC.V08";
static final String MAGIC_V9 = "MiXCR.VDJC.V09";
static final String MAGIC_V10 = "MiXCR.VDJC.V10";
static final String MAGIC = MAGIC_V10;
static final String MAGIC_V11 = "MiXCR.VDJC.V11";
static final String MAGIC = MAGIC_V11;
static final int MAGIC_LENGTH = 14;
static final byte[] MAGIC_BYTES = MAGIC.getBytes(StandardCharsets.US_ASCII);
final PrimitivO output;
Expand Down Expand Up @@ -110,7 +103,7 @@ public void header(VDJCAlignerParameters parameters, List<VDJCGene> genes) {
GeneFeature feature = parameters.getFeatureToAlign(gt);
output.writeObject(feature);
if (feature != null)
output.putKnownReference(feature);
output.putKnownObject(feature);
}

header = true;
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/com/milaboratory/mixcr/basictypes/VDJCHit.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import io.repseq.core.VDJCGene;

import java.util.Arrays;
import java.util.function.Function;

@Serializable(by = IO.VDJCHitSerializer.class)
public final class VDJCHit implements Comparable<VDJCHit>, HasGene {
Expand Down Expand Up @@ -79,6 +80,16 @@ public VDJCHit setAlignment(int target, Alignment<NucleotideSequence> alignment)
return new VDJCHit(gene, newAlignments, alignedFeature);
}

@SuppressWarnings("unchecked")
public VDJCHit updateAlignments(Function<Alignment<NucleotideSequence>, Alignment<NucleotideSequence>> processor) {
return new VDJCHit(gene,
Arrays
.stream(alignments)
.map(al -> al == null ? null : processor.apply(al))
.toArray(Alignment[]::new),
alignedFeature, score);
}

public int getPosition(int target, ReferencePoint referencePoint) {
if (alignments[target] == null)
return -1;
Expand Down

0 comments on commit 3cffa58

Please sign in to comment.