Skip to content

Commit

Permalink
Extend clones (#355)
Browse files Browse the repository at this point in the history
* Fake cloneSet object for streamed clone objects in ClnAReader

* Correct clone sorting in extend action output.

* This fixes bug leading to wrong scoring in extend procedure.

* This fixes translation problem in exportAlignmentsPretty, and several other minor bugs:
  - bug in main class with no command line arguments
  - generalise slice action to vdjca files
  - adds new header initializer to VDJCAWriter

fixes #361

fixes #343
  • Loading branch information
dbolotin committed Mar 29, 2018
1 parent f7d9b8f commit 5a9a3a5
Show file tree
Hide file tree
Showing 26 changed files with 391 additions and 198 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -306,14 +306,14 @@ public void close() {
deferredAlignmentsLogger.close();
}

public CloneSet getCloneSet() {
public CloneSet getCloneSet(VDJCAlignerParameters alignerParameters) {
EnumMap<GeneType, GeneFeature> features = new EnumMap<>(GeneType.class);
for (GeneType geneType : GeneType.values()) {
GeneFeature gf = featuresToAlign.get(geneType);
if (gf != null)
features.put(geneType, gf);
}
return new CloneSet(Arrays.asList(realClones), usedGenes.values(), features, parameters.getAssemblingFeatures());
return new CloneSet(Arrays.asList(realClones), usedGenes.values(), features, alignerParameters, parameters);
}

public OutputPortCloseable<ReadToCloneMapping> getAssembledReadsPort() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -310,13 +310,13 @@ public int hashCode() {
result = 31 * result + (separateByJ ? 1 : 0);
result = 31 * result + (separateByC ? 1 : 0);
temp = Double.doubleToLongBits(maximalPreClusteringRatio);
result = 31 * result + (int) (temp^(temp >>> 32));
result = 31 * result + (int) (temp ^ (temp >>> 32));
result = 31 * result + (addReadsCountOnClustering ? 1 : 0);
result = 31 * result + (int) badQualityThreshold;
temp = Double.doubleToLongBits(maxBadPointsPercent);
result = 31 * result + (int) (temp^(temp >>> 32));
result = 31 * result + (int) (variants^(variants >>> 32));
result = 31 * result + (int) (minimalQuality^(minimalQuality >>> 32));
result = 31 * result + (int) (temp ^ (temp >>> 32));
result = 31 * result + (int) (variants ^ (variants >>> 32));
result = 31 * result + (int) (minimalQuality ^ (minimalQuality >>> 32));
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import cc.redberry.pipe.blocks.FilteringPort;
import com.milaboratory.mixcr.basictypes.CloneSet;
import com.milaboratory.mixcr.basictypes.VDJCAlignments;
import com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters;
import com.milaboratory.util.CanReportProgress;
import com.milaboratory.util.CanReportProgressAndStage;

Expand Down Expand Up @@ -122,7 +123,7 @@ public void run() {
isFinished = true;
}

public CloneSet getCloneSet() {
return assembler.getCloneSet();
public CloneSet getCloneSet(VDJCAlignerParameters alignerParameters) {
return assembler.getCloneSet(alignerParameters);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ public Clone create(int id, double count,
else
hits.put(GeneType.Diversity, new VDJCHit[0]);

return new Clone(targets, hits, assemblingFeatures, count, id);
return new Clone(targets, hits, count, id);
}

public Clone create(int id, CloneAccumulator accumulator) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ public FullSeqAssembler(CloneFactory cloneFactory,
geneScores.put(gt, scores);
}
this.alignerParameters = alignerParameters;
GeneFeature[] assemblingFeatures = clone.getAssemblingFeatures();
GeneFeature[] assemblingFeatures = clone.getParentCloneSet().getAssemblingFeatures();
if (assemblingFeatures.length != 1)
throw new IllegalArgumentException();

Expand Down Expand Up @@ -586,7 +586,7 @@ else if (floatingRightBound)
tmp = hits.get(Joining);
tmp[0] = substituteAlignments(tmp[0], jTopHitAlignments);

return new Clone(targets.sequences, hits, clone.getAssemblingFeatures(), count, 0);
return new Clone(targets.sequences, hits, count, 0);
}

//fixme write docs
Expand Down Expand Up @@ -893,7 +893,8 @@ public static abstract class RawVariantsData {
// array[readId] = (variantId << 8) | minQuality
abstract OutputPort<int[]> createPort();

void destroy() {}
void destroy() {
}
}

static final class VariantAggregator {
Expand Down
15 changes: 11 additions & 4 deletions src/main/java/com/milaboratory/mixcr/basictypes/ClnAReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
Expand Down Expand Up @@ -81,7 +82,6 @@ public final class ClnAReader implements AutoCloseable {

final VDJCAlignerParameters alignerParameters;
final CloneAssemblerParameters assemblerParameters;
final GeneFeature[] assemblingFeatures;
final CloneSetIO.GT2GFAdapter alignedFeatures;
final List<VDJCGene> genes;
final int numberOfClones;
Expand Down Expand Up @@ -144,7 +144,6 @@ public ClnAReader(Path path, VDJCLibraryRegistry libraryRegistry, int chunkSize)
this.versionInfo = input.readUTF();
this.alignerParameters = input.readObject(VDJCAlignerParameters.class);
this.assemblerParameters = input.readObject(CloneAssemblerParameters.class);
this.assemblingFeatures = input.readObject(GeneFeature[].class);
this.alignedFeatures = new CloneSetIO.GT2GFAdapter(IO.readGF2GTMap(input));
this.genes = IOUtil.readGeneReferences(input, libraryRegistry);
}
Expand All @@ -164,12 +163,15 @@ public VDJCAlignerParameters getAlignerParameters() {
return alignerParameters;
}

/**
* Clone assembler parameters
*/
public CloneAssemblerParameters getAssemblerParameters() {
return assemblerParameters;
}

public GeneFeature[] getAssemblingFeatures() {
return assemblingFeatures;
return assemblerParameters.getAssemblingFeatures();
}

/**
Expand Down Expand Up @@ -222,7 +224,7 @@ public CloneSet readCloneSet() throws IOException {
for (int i = 0; i < count; i++)
clones.add(input.readObject(Clone.class));

return new CloneSet(clones, genes, alignedFeatures.map, assemblingFeatures);
return new CloneSet(clones, genes, alignedFeatures.map, alignerParameters, assemblerParameters);
}

/**
Expand Down Expand Up @@ -286,6 +288,7 @@ public final class CloneAlignmentsPort
implements OutputPort<CloneAlignments>, CanReportProgress {
private final AtomicInteger cloneIndex = new AtomicInteger();
private final AtomicLong processedAlignments = new AtomicLong();
private final CloneSet fakeCloneSet;
private final PipeDataInputReader<Clone> clones;
volatile boolean isFinished = false;

Expand All @@ -294,6 +297,9 @@ public final class CloneAlignmentsPort
PrimitivI input = new PrimitivI(new InputDataStream(firstClonePosition, index[0]));
IOUtil.registerGeneReferences(input, genes, alignedFeatures);
this.clones = new PipeDataInputReader<>(Clone.class, input, numberOfClones);
this.fakeCloneSet = new CloneSet(Collections.EMPTY_LIST,
genes, alignedFeatures.map,
alignerParameters, assemblerParameters);
}

@Override
Expand All @@ -303,6 +309,7 @@ public CloneAlignments take() {
isFinished = true;
return null;
}
clone.setParentCloneSet(fakeCloneSet);
CloneAlignments result = new CloneAlignments(clone, cloneIndex.getAndIncrement());
processedAlignments.addAndGet(result.alignmentsCount);
return result;
Expand Down
16 changes: 5 additions & 11 deletions src/main/java/com/milaboratory/mixcr/basictypes/ClnAWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,13 @@
import cc.redberry.pipe.util.CountingOutputPort;
import com.milaboratory.mixcr.assembler.CloneAssemblerParameters;
import com.milaboratory.mixcr.util.MiXCRVersionInfo;
import com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters;
import com.milaboratory.primitivio.PipeDataInputReader;
import com.milaboratory.primitivio.PrimitivI;
import com.milaboratory.primitivio.PrimitivO;
import com.milaboratory.util.CanReportProgressAndStage;
import com.milaboratory.util.ObjectSerializer;
import com.milaboratory.util.Sorter;
import gnu.trove.list.array.TLongArrayList;
import io.repseq.core.GeneFeature;
import io.repseq.core.GeneType;
import io.repseq.core.VDJCGene;
import org.apache.commons.io.output.CountingOutputStream;
Expand Down Expand Up @@ -103,9 +101,7 @@ public ClnAWriter(File file) throws IOException {
/**
* Step 1
*/
public synchronized void writeClones(CloneSet cloneSet,
VDJCAlignerParameters alignerParameters,
CloneAssemblerParameters assemblerParameters) {
public synchronized void writeClones(CloneSet cloneSet) {
// Checking state
if (clonesBlockFinished)
throw new IllegalArgumentException("Clone block was already written.");
Expand All @@ -124,13 +120,11 @@ public synchronized void writeClones(CloneSet cloneSet,
output.writeUTF(MiXCRVersionInfo.get()
.getVersionString(MiXCRVersionInfo.OutputType.ToFile));

// Writing aligner & assembler parameters
output.writeObject(alignerParameters);
output.writeObject(assemblerParameters);
// Writing aligner parameters
output.writeObject(cloneSet.alignmentParameters);

// Saving assembling features
GeneFeature[] assemblingFeatures = cloneSet.getAssemblingFeatures();
output.writeObject(assemblingFeatures);
// Writing assembler parameters
output.writeObject(cloneSet.assemblerParameters);

// Writing aligned gene features for each gene type
IO.writeGT2GFMap(output, cloneSet.alignedFeatures);
Expand Down
25 changes: 7 additions & 18 deletions src/main/java/com/milaboratory/mixcr/basictypes/Clone.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,37 +29,33 @@
package com.milaboratory.mixcr.basictypes;

import com.milaboratory.core.sequence.NSequenceWithQuality;
import com.milaboratory.core.sequence.SequencesUtils;
import com.milaboratory.primitivio.annotations.Serializable;
import io.repseq.core.GeneFeature;
import io.repseq.core.GeneType;

import java.util.EnumMap;
import java.util.Objects;

@Serializable(by = IO.CloneSerializer.class)
public final class Clone extends VDJCObject {
final GeneFeature[] assemblingFeatures;
final double count;
final int id;
CloneSet parent = null;

public Clone(NSequenceWithQuality[] targets, EnumMap<GeneType, VDJCHit[]> hits, GeneFeature[] assemblingFeatures, double count, int id) {
public Clone(NSequenceWithQuality[] targets, EnumMap<GeneType, VDJCHit[]> hits, double count, int id) {
super(hits, targets);
this.assemblingFeatures = assemblingFeatures;
this.count = count;
this.id = id;
}

public Clone setId(int id) {
Clone r = new Clone(targets, hits, assemblingFeatures, count, id);
Clone r = new Clone(targets, hits, count, id);
r.setParentCloneSet(parent);
return r;
}

/** Returns new instance with parent clone set set to null */
public Clone resetParentCloneSet() {
return new Clone(targets, hits, assemblingFeatures, count, id);
return new Clone(targets, hits, count, id);
}

public void setParentCloneSet(CloneSet set) {
Expand All @@ -68,6 +64,10 @@ public void setParentCloneSet(CloneSet set) {
this.parent = set;
}

public CloneSet getParentCloneSet() {
return parent;
}

public double getFraction() {
if (parent == null)
return Double.NaN;
Expand All @@ -78,21 +78,10 @@ public double getFraction(long totalCount) {
return 1.0 * count / totalCount;
}

public GeneFeature[] getAssemblingFeatures() {
return assemblingFeatures;
}

public double getCount() {
return count;
}

public NSequenceWithQuality getConcatenatedClonalSequence() {
NSequenceWithQuality[] seqs = new NSequenceWithQuality[assemblingFeatures.length];
for (int i = 0; i < assemblingFeatures.length; i++)
seqs[i] = getFeature(assemblingFeatures[i]);
return SequencesUtils.concatenate(seqs);
}

public int getId() {
return id;
}
Expand Down
30 changes: 19 additions & 11 deletions src/main/java/com/milaboratory/mixcr/basictypes/CloneSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
package com.milaboratory.mixcr.basictypes;

import cc.redberry.primitives.Filter;
import com.milaboratory.mixcr.assembler.CloneAssemblerParameters;
import com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters;
import io.repseq.core.GeneFeature;
import io.repseq.core.GeneType;
import io.repseq.core.VDJCGene;
Expand All @@ -41,39 +43,36 @@
*/
public final class CloneSet implements Iterable<Clone> {
String versionInfo;
final GeneFeature[] assemblingFeatures;
final CloneAssemblerParameters assemblerParameters;
final VDJCAlignerParameters alignmentParameters;
final EnumMap<GeneType, GeneFeature> alignedFeatures;
final List<VDJCGene> usedGenes;
final List<Clone> clones;
final long totalCount;

public CloneSet(List<Clone> clones, Collection<VDJCGene> usedGenes, EnumMap<GeneType, GeneFeature> alignedFeatures,
GeneFeature[] assemblingFeatures) {
VDJCAlignerParameters alignmentParameters, CloneAssemblerParameters assemblerParameters) {
this.clones = Collections.unmodifiableList(new ArrayList<>(clones));
long totalCount = 0;
for (Clone clone : clones) {
totalCount += clone.count;
clone.setParentCloneSet(this);
}
this.alignedFeatures = alignedFeatures.clone();
this.alignmentParameters = alignmentParameters;
this.assemblerParameters = assemblerParameters;
this.usedGenes = Collections.unmodifiableList(new ArrayList<>(usedGenes));
this.totalCount = totalCount;
this.assemblingFeatures = assemblingFeatures;
}

public CloneSet(List<Clone> clones) {
this.clones = Collections.unmodifiableList(new ArrayList<>(clones));
long totalCount = 0;
HashMap<VDJCGeneId, VDJCGene> genes = new HashMap<>();
EnumMap<GeneType, GeneFeature> alignedFeatures = new EnumMap<>(GeneType.class);
GeneFeature[] assemblingFeatures = null;
for (Clone clone : clones) {
totalCount += clone.count;
clone.setParentCloneSet(this);
if (assemblingFeatures == null)
assemblingFeatures = clone.getAssemblingFeatures();
else if (!Arrays.equals(assemblingFeatures, clone.getAssemblingFeatures()))
throw new IllegalArgumentException("Different assembling features.");
for (GeneType geneType : GeneType.values())
for (VDJCHit hit : clone.getHits(geneType)) {
VDJCGene gene = hit.getGene();
Expand All @@ -84,8 +83,9 @@ else if (!Arrays.equals(assemblingFeatures, clone.getAssemblingFeatures()))
throw new IllegalArgumentException("Different aligned feature for clones.");
}
}
this.assemblingFeatures = assemblingFeatures;
this.alignedFeatures = alignedFeatures;
this.assemblerParameters = null;
this.alignmentParameters = null;
this.usedGenes = Collections.unmodifiableList(new ArrayList<>(genes.values()));
this.totalCount = totalCount;
}
Expand All @@ -103,7 +103,15 @@ public int size() {
}

public GeneFeature[] getAssemblingFeatures() {
return assemblingFeatures;
return assemblerParameters.getAssemblingFeatures();
}

public CloneAssemblerParameters getAssemblerParameters() {
return assemblerParameters;
}

public VDJCAlignerParameters getAlignmentParameters() {
return alignmentParameters;
}

public List<VDJCGene> getUsedGenes() {
Expand Down Expand Up @@ -143,6 +151,6 @@ public static CloneSet transform(CloneSet in, Filter<Clone> filter) {
newClones.add(c);
}
}
return new CloneSet(newClones, in.usedGenes, in.alignedFeatures, in.assemblingFeatures);
return new CloneSet(newClones, in.usedGenes, in.alignedFeatures, in.alignmentParameters, in.assemblerParameters);
}
}

0 comments on commit 5a9a3a5

Please sign in to comment.