Skip to content

Commit

Permalink
This fixes #6
Browse files Browse the repository at this point in the history
  • Loading branch information
PoslavskySV committed Sep 1, 2015
1 parent 843bcd6 commit f2f1f1d
Show file tree
Hide file tree
Showing 8 changed files with 284 additions and 31 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG_CURRENT
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
Added export options -vHitsWithoutScores (same for J and D genes)

Added compression for .vdjca and .clns files (when specifying additional ".gz" extension: e.g. "mixcr align inut.fastq output.vdjca.gz" etc.)
Added compression for .vdjca and .clns files (when specifying additional ".gz" extension: e.g. "mixcr align inut.fastq output.vdjca.gz" etc.)

Fixed #6
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import com.milaboratory.mixcr.basictypes.VDJCHit;
import com.milaboratory.mixcr.reference.*;
import com.milaboratory.mixcr.vdjaligners.SingleDAligner;
import com.milaboratory.mixcr.vdjaligners.VDJCAligner;
import gnu.trove.iterator.TObjectFloatIterator;
import gnu.trove.list.array.TFloatArrayList;
import gnu.trove.map.hash.TObjectFloatHashMap;
Expand Down Expand Up @@ -220,7 +221,9 @@ Clone create(int id, CloneAccumulator accumulator) {

if (from < to)
hits.put(GeneType.Diversity,
dAligner.align(sequenceToAlign, from, to, indexOfAssemblingFeatureWithD,
dAligner.align(sequenceToAlign,
VDJCAligner.getPossibleDLoci(hits.get(GeneType.Variable), hits.get(GeneType.Joining)),
from, to, indexOfAssemblingFeatureWithD,
assemblingFeatures.length));
else
hits.put(GeneType.Diversity, new VDJCHit[0]);
Expand Down
194 changes: 194 additions & 0 deletions src/main/java/com/milaboratory/mixcr/util/RunMiXCR.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
package com.milaboratory.mixcr.util;

import cc.redberry.pipe.CUtils;
import cc.redberry.pipe.OutputPort;
import cc.redberry.pipe.OutputPortCloseable;
import cc.redberry.pipe.blocks.ParallelProcessor;
import cc.redberry.pipe.util.Chunk;
import cc.redberry.pipe.util.Indexer;
import cc.redberry.pipe.util.OrderedOutputPort;
import com.milaboratory.core.io.sequence.SequenceRead;
import com.milaboratory.core.io.sequence.SequenceReaderCloseable;
import com.milaboratory.core.io.sequence.fasta.FastaReader;
import com.milaboratory.core.io.sequence.fasta.FastaSequenceReaderWrapper;
import com.milaboratory.core.io.sequence.fastq.PairedFastqReader;
import com.milaboratory.core.io.sequence.fastq.SingleFastqReader;
import com.milaboratory.core.sequence.NucleotideSequence;
import com.milaboratory.mixcr.assembler.*;
import com.milaboratory.mixcr.basictypes.CloneSet;
import com.milaboratory.mixcr.basictypes.VDJCAlignments;
import com.milaboratory.mixcr.cli.AlignerReport;
import com.milaboratory.mixcr.cli.CloneAssemblerReport;
import com.milaboratory.mixcr.reference.*;
import com.milaboratory.mixcr.vdjaligners.VDJCAligner;
import com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters;
import com.milaboratory.mixcr.vdjaligners.VDJCAlignmentResult;
import com.milaboratory.mixcr.vdjaligners.VDJCParametersPresets;
import com.milaboratory.util.CanReportProgress;
import com.milaboratory.util.SmartProgressReporter;

import java.io.IOException;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
import java.util.Set;

import static cc.redberry.pipe.CUtils.chunked;
import static cc.redberry.pipe.CUtils.unchunked;

/**
* @author Dmitry Bolotin
* @author Stanislav Poslavsky
*/
public final class RunMiXCR {

public static AssembleResult assemble(final AlignResult align) {
RunMiXCRAnalysis parameters = align.parameters;
try (CloneAssembler assembler = new CloneAssembler(parameters.cloneAssemblerParameters,
false, align.usedAlleles)) {

CloneAssemblerReport report = new CloneAssemblerReport();
assembler.setListener(report);

CloneAssemblerRunner assemblerRunner = new CloneAssemblerRunner(new AlignmentsProvider() {
@Override
public OutputPortCloseable<VDJCAlignments> create() {
return opCloseable(CUtils.asOutputPort(align.alignments));
}

@Override
public long getTotalNumberOfReads() {
return align.alignments.size();
}
}, assembler, parameters.threads);

//start progress reporting
SmartProgressReporter.startProgressReport(assemblerRunner);

assemblerRunner.run();

CloneSet cloneSet = assemblerRunner.getCloneSet();
return new AssembleResult(cloneSet, report);
}
}

public static AlignResult align(String... files) throws Exception {
return align(new RunMiXCRAnalysis(files));
}

public static AlignResult align(RunMiXCRAnalysis parameters) throws Exception {
VDJCAlignerParameters alignerParameters = parameters.alignerParameters;

VDJCAligner aligner = VDJCAligner.createAligner(alignerParameters,
parameters.isInputPaired(), alignerParameters.getMergerParameters() != null);

LociLibrary ll = LociLibraryManager.getDefault().getLibrary("mi");

List<Allele> alleles = new ArrayList<>();
for (Locus locus : parameters.loci)
for (Allele allele : ll.getLocus(parameters.taxonId, locus).getAllAlleles())
if (alignerParameters.containsRequiredFeature(allele) &&
(allele.isFunctional() || !parameters.isFunctionalOnly)) {
alleles.add(allele);
aligner.addAllele(allele);
}

AlignerReport report = new AlignerReport();
aligner.setEventsListener(report);

try (SequenceReaderCloseable<? extends SequenceRead> reader = parameters.createReader()) {

//start progress reporting
SmartProgressReporter.startProgressReport("align", (CanReportProgress) reader);

OutputPort<Chunk<SequenceRead>> mainInputReads = CUtils.buffered((OutputPort) chunked(reader, 64), 16);
OutputPort<VDJCAlignmentResult> alignments = unchunked(new ParallelProcessor(mainInputReads, chunked(aligner), parameters.threads));
List<VDJCAlignments> als = new ArrayList<>();
int ind = 0;
for (VDJCAlignmentResult t : CUtils.it(new OrderedOutputPort<>(alignments, new Indexer<VDJCAlignmentResult>() {
@Override
public long getIndex(VDJCAlignmentResult r) {
return r.read.getId();
}
}))) {
if (t.alignment != null) {
t.alignment.setAlignmentsIndex(ind++);
als.add(t.alignment);
}
}
return new AlignResult(parameters, reader.getNumberOfReads(), report, als, alleles);
}
}

public static final class AssembleResult {
final CloneSet cloneSet;
final CloneAssemblerReport report;

public AssembleResult(CloneSet cloneSet, CloneAssemblerReport report) {
this.cloneSet = cloneSet;
this.report = report;
}
}

public static final class AlignResult {
final RunMiXCRAnalysis parameters;
final long totalNumberOfReads;
final AlignerReport report;
final List<VDJCAlignments> alignments;
final List<Allele> usedAlleles;

public AlignResult(RunMiXCRAnalysis parameters, long totalNumberOfReads, AlignerReport report,
List<VDJCAlignments> alignments, List<Allele> usedAlleles) {
this.parameters = parameters;
this.totalNumberOfReads = totalNumberOfReads;
this.report = report;
this.alignments = alignments;
this.usedAlleles = usedAlleles;
}
}

public static final class RunMiXCRAnalysis {
public VDJCAlignerParameters alignerParameters = VDJCParametersPresets.getByName("default");
public CloneAssemblerParameters cloneAssemblerParameters = CloneAssemblerParametersPresets.getByName("default");
public Set<Locus> loci = EnumSet.allOf(Locus.class);
public int taxonId = Species.HomoSapiens;
public boolean isFunctionalOnly = true;
public int threads = Runtime.getRuntime().availableProcessors();
public final String[] inputFiles;

public RunMiXCRAnalysis(String... inputFiles) {
this.inputFiles = inputFiles;
}

public boolean isInputPaired() {
return inputFiles.length == 2;
}

public SequenceReaderCloseable<? extends SequenceRead> createReader() throws IOException {
if (isInputPaired())
return new PairedFastqReader(inputFiles[0], inputFiles[1], true);
else {
String[] s = inputFiles[0].split("\\.");
if (s[s.length - 1].equals("fasta"))
return new FastaSequenceReaderWrapper(
new FastaReader<>(inputFiles[0], NucleotideSequence.ALPHABET),
true);
else
return new SingleFastqReader(inputFiles[0], true);
}
}
}

private static <T> OutputPortCloseable<T> opCloseable(final OutputPort<T> op) {
return new OutputPortCloseable<T>() {
@Override
public void close() {
}

@Override
public T take() {
return op.take();
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,19 @@
import com.milaboratory.mixcr.basictypes.VDJCHit;
import com.milaboratory.mixcr.reference.Allele;
import com.milaboratory.mixcr.reference.GeneFeature;
import com.milaboratory.mixcr.reference.Locus;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ExecutionException;

public final class SingleDAligner {
private final AlignmentScoring<NucleotideSequence> scoring;
private final float absoluteMinScore, relativeMinScore;
private final int maxHits;
private final List<NucleotideSequence> sequences = new ArrayList<>();
private final List<SequenceWithLocus> sequences = new ArrayList<>();
private final List<Allele> alleles;
private final GeneFeature featureToAlign;

Expand All @@ -72,11 +74,11 @@ public SingleDAligner(DAlignerParameters parameters,
this.maxHits = parameters.getMaxHits();
this.featureToAlign = parameters.getGeneFeatureToAlign();
for (Allele allele : alleles)
sequences.add(allele.getFeature(featureToAlign));
sequences.add(new SequenceWithLocus(allele, featureToAlign));
this.alleles = new ArrayList<>(alleles);
}

List<PreVDJCHit> align0(NucleotideSequence sequence, int from, int to) {
List<PreVDJCHit> align0(NucleotideSequence sequence, Set<Locus> loci, int from, int to) {
if (from > to)
throw new IllegalArgumentException();

Expand All @@ -88,33 +90,33 @@ List<PreVDJCHit> align0(NucleotideSequence sequence, int from, int to) {

PreVDJCHit h;
for (PreVDJCHit hit : cachedResult) {
//filter non-possible loci
if (!loci.contains(sequences.get(hit.id).locus))
continue;

result.add(h = convert(hit, from));

assert sequence.getRange(h.alignment.getSequence2Range()).equals(
h.alignment
.getRelativeMutations()
.mutate(sequences.get(h.id)
.getRange(h.alignment.getSequence1Range()))
);
.mutate(sequences.get(h.id).sequence
.getRange(h.alignment.getSequence1Range())));
}

cutToScore(result);
return result;
} catch (ExecutionException e) {
throw new RuntimeException(e);
}
}

public VDJCHit[] align(NucleotideSequence sequence, int from, int to,
public VDJCHit[] align(NucleotideSequence sequence, Set<Locus> loci, int from, int to,
int targetIndex, int numberOfTargets) {
List<PreVDJCHit> preHits = align0(sequence, from, to);
List<PreVDJCHit> preHits = align0(sequence, loci, from, to);
return PreVDJCHit.convert(alleles, featureToAlign, preHits,
targetIndex, numberOfTargets);
}

public VDJCHit[] align(NucleotideSequence sequence, int from, int to) {
return align(sequence, from, to, 0, 1);
}

private PreVDJCHit convert(PreVDJCHit hit, int from) {
Alignment<NucleotideSequence> alignment = hit.alignment;
return new PreVDJCHit(hit.id, new Alignment<>(alignment.getSequence1(),
Expand All @@ -131,26 +133,35 @@ private List<PreVDJCHit> _align(NucleotideSequence sequence) {
List<PreVDJCHit> result = new ArrayList<>();
Alignment<NucleotideSequence> alignment;
for (int i = 0; i < sequences.size(); ++i) {
alignment = Aligner.alignLocal(scoring, sequences.get(i), sequence);
alignment = Aligner.alignLocal(scoring, sequences.get(i).sequence, sequence);

if (alignment == null || alignment.getScore() < absoluteMinScore)
continue;

result.add(new PreVDJCHit(i, alignment));
}

if (result.isEmpty())
return result;

Collections.sort(result, PreVDJCHit.SCORE_COMPARATOR);
return result;
}

private void cutToScore(List<PreVDJCHit> result) {
if (result.isEmpty())
return;
float threshold = Math.max(absoluteMinScore, result.get(0).alignment.getScore() * relativeMinScore);

for (int i = result.size() - 1; i >= 0; --i)
if (result.get(i).alignment.getScore() < threshold
|| i >= maxHits)
result.remove(i);
}

return result;
private static final class SequenceWithLocus {
private final NucleotideSequence sequence;
private final Locus locus;

public SequenceWithLocus(Allele allele, GeneFeature featureToAlign) {
this.sequence = allele.getFeature(featureToAlign);
this.locus = allele.getLocus();
}
}
}
16 changes: 12 additions & 4 deletions src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAligner.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,12 @@
import cc.redberry.pipe.Processor;
import com.milaboratory.core.io.sequence.SequenceRead;
import com.milaboratory.mixcr.basictypes.VDJCAlignments;
import com.milaboratory.mixcr.basictypes.VDJCHit;
import com.milaboratory.mixcr.reference.Allele;
import com.milaboratory.mixcr.reference.GeneType;
import com.milaboratory.mixcr.reference.Locus;

import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumMap;
import java.util.List;
import java.util.*;

public abstract class VDJCAligner<R extends SequenceRead> implements Processor<R, VDJCAlignmentResult<R>> {
protected volatile boolean initialized = false;
Expand Down Expand Up @@ -124,4 +123,13 @@ public static VDJCAligner createAligner(VDJCAlignerParameters alignerParameters,
: new VDJCAlignerPVFirst(alignerParameters)
: new VDJCAlignerSJFirst(alignerParameters);
}

public static Set<Locus> getPossibleDLoci(VDJCHit[] vHits, VDJCHit[] jHits) {
EnumSet loci = EnumSet.noneOf(Locus.class);
for (VDJCHit h : vHits)
loci.add(h.getAllele().getLocus());
for (VDJCHit h : jHits)
loci.add(h.getAllele().getLocus());
return loci;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,8 @@ VDJCAlignments createResult(long readId, VDJCAlignerPVFirst aligner) {
to = jAlignment.getSequence2Range().getFrom();
if (from > to)
continue;
List<PreVDJCHit> temp = singleDAligner.align0(target.targets[i].getSequence(), from, to);
List<PreVDJCHit> temp = singleDAligner.align0(target.targets[i].getSequence(),
getPossibleDLoci(vHits,jHits), from, to);
preDHits[i] = temp.toArray(new PreVDJCHit[temp.size()]);
}

Expand Down

0 comments on commit f2f1f1d

Please sign in to comment.