Skip to content
Permalink
Browse files

Smart floating bounds in VDJAligner PV-first (#525)

* Range island detection done.

* Correction for default trimming quality threshold in contig assembly.

* milib submodule upgrade

* ReadTrimming option on alignment stage.

* MiLib submodule upgrade.

* Fixes error in FullSeqAssembleTest.

* Fix for null listener in PVFirst.

* fix bug in PVfirst regarding kAligner2 finest resutls

* temp fix bug due to indels in homopolymers on the boundaries of CDR3

* chery pick

* Added `exportAlignmentsForClones` action

* Fix tricky bug in contig aligners

* update submodule repseqio

* shift indels at homopolymers by default

* FullSeqAssembler: Indices mapping problem + Assertion exception (#536)

* First attempt.

* This fixes assertion exception appearing when the sequence assembled for the assembing region is not equal to the clonal sequence.
  • Loading branch information...
PoslavskySV authored and dbolotin committed Jul 21, 2019
1 parent fc53271 commit 94b72fcfa41e313b6701b4cdc64a3837d71ca123
Showing with 852 additions and 163 deletions.
  1. +2 −1 CHANGELOG_CURRENT
  2. +5 −0 itests.sh
  3. +1 −1 milib
  4. +1 −1 repseqio
  5. +20 −13 src/main/java/com/milaboratory/mixcr/assembler/CloneAssembler.java
  6. +81 −0 src/main/java/com/milaboratory/mixcr/assembler/fullseq/CoverageAccumulator.java
  7. +323 −98 src/main/java/com/milaboratory/mixcr/assembler/fullseq/FullSeqAssembler.java
  8. +21 −0 src/main/java/com/milaboratory/mixcr/assembler/fullseq/FullSeqAssemblerReport.java
  9. +2 −7 src/main/java/com/milaboratory/mixcr/basictypes/ClnAWriter.java
  10. +15 −6 src/main/java/com/milaboratory/mixcr/basictypes/VDJCAlignments.java
  11. +1 −1 src/main/java/com/milaboratory/mixcr/basictypes/VDJCHit.java
  12. +42 −1 src/main/java/com/milaboratory/mixcr/basictypes/VDJCObject.java
  13. +56 −0 src/main/java/com/milaboratory/mixcr/cli/AlignerReport.java
  14. +27 −2 src/main/java/com/milaboratory/mixcr/cli/CommandAlign.java
  15. +56 −2 src/main/java/com/milaboratory/mixcr/cli/CommandAssembleContigs.java
  16. +102 −0 src/main/java/com/milaboratory/mixcr/cli/CommandExportAlignmentsForClones.java
  17. +7 −0 src/main/java/com/milaboratory/mixcr/cli/CommandExportAlignmentsPretty.java
  18. +1 −1 src/main/java/com/milaboratory/mixcr/cli/CommandExtend.java
  19. +1 −0 src/main/java/com/milaboratory/mixcr/cli/Main.java
  20. +1 −1 src/main/java/com/milaboratory/mixcr/partialassembler/PartialAlignmentsAssembler.java
  21. +1 −6 src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAligner.java
  22. +5 −2 src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerEventListener.java
  23. +43 −7 src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerPVFirst.java
  24. +16 −3 src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerParameters.java
  25. +6 −0 src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignmentResult.java
  26. +1 −1 src/main/resources/parameters/full_seq_assembler_parameters.json
  27. +6 −3 src/main/resources/parameters/vdjcaligner_parameters.json
  28. +4 −4 src/test/java/com/milaboratory/mixcr/assembler/fullseq/FullSeqAssemblerTest.java
  29. +4 −1 src/test/java/com/milaboratory/mixcr/basictypes/VDJCObjectTest.java
  30. +1 −1 src/test/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerParametersTest.java
@@ -1 +1,2 @@
Fixes exception in `-mutationsDetailed` and similar export options
Fixes exception in `-mutationsDetailed` and similar export options
Added `exportAlignmentsForClones` action
@@ -103,6 +103,11 @@ if [[ $create_standard_results == true ]]; then
go_assemble ${s}_paired
mixcr align -s hs -r ${s}_single.vdjca.report ${s}_R1.fastq ${s}_single.vdjca
go_assemble ${s}_single

mixcr align -s hs -p kAligner2 -r ${s}_paired.vdjca.report ${s}_R1.fastq ${s}_R2.fastq ${s}_paired2.vdjca
go_assemble ${s}_paired2
mixcr align -s hs -p kAligner2 -r ${s}_single.vdjca.report ${s}_R1.fastq ${s}_single2.vdjca
go_assemble ${s}_single2
done
fi

2 milib
Submodule milib updated 20 files
+1 −1 pom.xml
+4 −0 src/main/java/com/milaboratory/core/alignment/batch/BatchAlignerWithBaseWithFilter.java
+37 −2 src/main/java/com/milaboratory/core/alignment/kaligner1/KAligner.java
+50 −0 src/main/java/com/milaboratory/core/alignment/kaligner1/KMapper.java
+40 −3 src/main/java/com/milaboratory/core/alignment/kaligner2/KAligner2.java
+56 −0 src/main/java/com/milaboratory/core/alignment/kaligner2/KMapper2.java
+20 −1 src/main/java/com/milaboratory/core/io/sequence/MultiRead.java
+20 −0 src/main/java/com/milaboratory/core/io/sequence/PairedRead.java
+18 −0 src/main/java/com/milaboratory/core/io/sequence/SequenceRead.java
+12 −0 src/main/java/com/milaboratory/core/io/sequence/SequenceReadUtil.java
+21 −2 src/main/java/com/milaboratory/core/io/sequence/SingleRead.java
+20 −0 src/main/java/com/milaboratory/core/sequence/quality/FunctionWithIndex.java
+151 −8 src/main/java/com/milaboratory/core/sequence/quality/QualityTrimmer.java
+2 −0 src/main/java/com/milaboratory/core/sequence/quality/QualityTrimmerParameters.java
+23 −0 src/main/java/com/milaboratory/core/sequence/quality/ReadTrimmerListener.java
+64 −0 src/main/java/com/milaboratory/core/sequence/quality/ReadTrimmerProcessor.java
+97 −0 src/main/java/com/milaboratory/core/sequence/quality/ReadTrimmerReport.java
+6 −0 src/main/java/com/milaboratory/util/ArraysUtils.java
+29 −0 src/main/java/com/milaboratory/util/StreamUtil.java
+121 −0 src/test/java/com/milaboratory/core/sequence/quality/QualityTrimmerTest.java
@@ -284,7 +284,7 @@ public void runClustering() {
if (clusteredClonesAccumulators != null)
throw new IllegalStateException("Already clustered.");
if (!preClusteringDone)
throw new IllegalStateException("No preclustering done.");
throw new IllegalStateException("No pre-clustering is done.");

@SuppressWarnings("unchecked")
Clustering clustering = new Clustering(cloneList,
@@ -329,7 +329,7 @@ public long getAlignmentsCount() {

public void buildClones() {
if (!preClusteringDone)
throw new IllegalStateException("No preclustering done.");
throw new IllegalStateException("No pre-clustering is done.");
ClonesBuilder builder = new ClonesBuilder();
progressReporter = builder;
builder.buildClones();
@@ -532,7 +532,7 @@ public boolean allowMutation(NucleotideSequence reference, int position,
volatile int progress;

private ClonesBuilder() {
this.sourceSize = clusteredClonesAccumulators != null ? clusteredClonesAccumulators.size() : clones.size();
this.sourceSize = clusteredClonesAccumulators != null ? clusteredClonesAccumulators.size() : cloneList.size();
}

@Override
@@ -572,8 +572,14 @@ void buildClones() {
else {
for (TIntIntIterator it = idMapping.iterator(); it.hasNext(); ) {
it.advance();
if (newIdMapping.containsKey(it.value()))
it.setValue(newIdMapping.get(it.value()));
int val = it.value();
if (val >= 0) { // "renaming" normal clonotypes
// if (newIdMapping.containsKey(val))
it.setValue(newIdMapping.get(val));
} else { // "renaming" clustered clonotypes
// if (newIdMapping.containsKey(~val))
it.setValue(newIdMapping.get(~val));
}
}
}
source = Arrays.asList(sourceArray);
@@ -671,11 +677,11 @@ synchronized CloneAccumulator accumulate(ClonalSequence sequence, VDJCAlignments
// Score filtering step

// Calculation
float[] maxScores = new float[3];
float[] maxScores = new float[2];
for (CloneAccumulator acc : accs) {
if (acc == null)
continue;
for (int i = 0; i < 3; i++) {
for (int i = 0; i < 2; i++) { // Only for V and J
GeneType gt = GeneType.VJC_REFERENCE[i];
maxScores[i] =
parameters.getSeparateBy(gt)
@@ -684,7 +690,7 @@ synchronized CloneAccumulator accumulate(ClonalSequence sequence, VDJCAlignments
}
}

for (int i = 0; i < 3; i++)
for (int i = 0; i < 2; i++) // Only for V and J
maxScores[i] /= parameters.preClusteringScoreFilteringRatio;

// Filtering low score clonotypes
@@ -693,11 +699,12 @@ synchronized CloneAccumulator accumulate(ClonalSequence sequence, VDJCAlignments
if (accs[i] == null)
continue;

for (int j = 0; j < 3; j++) {
for (int j = 0; j < 2; j++) { // Only for V and J
if (accs[i].getBestGene(GeneType.VJC_REFERENCE[j]) != null &&
accs[i].getBestScore(GeneType.VJC_REFERENCE[j]) < maxScores[j]) {
dropped.accept(accs[i]);
accs[i] = null;
++deleted;
break;
}
}
@@ -706,23 +713,23 @@ synchronized CloneAccumulator accumulate(ClonalSequence sequence, VDJCAlignments
// Filtering low quality clonotypes
List<CloneAccumulator> result = new ArrayList<>(accs.length - deleted);

out:
for (CloneAccumulator acc : accs) {
// null marks clustered clonotypes
if (acc == null)
continue;

acc.rebuildClonalSequence();

if (acc.getSequence().getConcatenated().getQuality().minValue() <
parameters.minimalQuality) {
if (acc.getSequence().getConcatenated().getQuality().minValue() < parameters.minimalQuality) {
dropped.accept(acc);
continue out;
continue;
}

result.add(acc);
}

assert result.size() == accs.length - deleted;

return result;
}

@@ -0,0 +1,81 @@
/*
* Copyright (c) 2014-2019, Bolotin Dmitry, Chudakov Dmitry, Shugay Mikhail
* (here and after addressed as Inventors)
* All Rights Reserved
*
* Permission to use, copy, modify and distribute any part of this program for
* educational, research and non-profit purposes, by non-profit institutions
* only, without fee, and without a written agreement is hereby granted,
* provided that the above copyright notice, this paragraph and the following
* three paragraphs appear in all copies.
*
* Those desiring to incorporate this work into commercial products or use for
* commercial purposes should contact MiLaboratory LLC, which owns exclusive
* rights for distribution of this program for commercial purposes, using the
* following email address: licensing@milaboratory.com.
*
* IN NO EVENT SHALL THE INVENTORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
* SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
* ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN IF THE INVENTORS HAS BEEN
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE INVENTORS HAS
* NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
* MODIFICATIONS. THE INVENTORS MAKES NO REPRESENTATIONS AND EXTENDS NO
* WARRANTIES OF ANY KIND, EITHER IMPLIED OR EXPRESS, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
* PARTICULAR PURPOSE, OR THAT THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY
* PATENT, TRADEMARK OR OTHER RIGHTS.
*/
package com.milaboratory.mixcr.assembler.fullseq;

import com.milaboratory.core.Range;
import com.milaboratory.core.alignment.Alignment;
import com.milaboratory.core.sequence.NucleotideSequence;
import com.milaboratory.mixcr.basictypes.VDJCHit;
import gnu.trove.map.hash.TLongIntHashMap;

import java.util.Arrays;
import java.util.DoubleSummaryStatistics;

public final class CoverageAccumulator {
public final VDJCHit hit;
private final long[] coverage;

public CoverageAccumulator(VDJCHit hit) {
this.hit = hit;
final int geneLength = hit.getGene().getPartitioning().getLength(hit.getAlignedFeature());
this.coverage = new long[geneLength];
}

public void accumulate(VDJCHit hit) {
for (int targetId = 0; targetId < hit.numberOfTargets(); targetId++) {
Alignment<NucleotideSequence> al = hit.getAlignment(targetId);
if (al == null)
continue;
Range coveredRange = al.getSequence1Range();
for (int i = coveredRange.getLower(); i < coveredRange.getUpper(); i++)
coverage[i]++;
}
}

public DoubleSummaryStatistics getStat() {
return Arrays.stream(coverage).mapToDouble(i -> i).summaryStatistics();
}

private final TLongIntHashMap nocCache = new TLongIntHashMap();

public int getNumberOfCoveredPoints(long threshold) {
if (nocCache.containsKey(threshold))
return nocCache.get(threshold);
else {
int c = (int) Arrays.stream(coverage).filter(i -> i >= threshold).count();
nocCache.put(threshold, c);
return c;
}
}

public double getFractionOfCoveredPoints(long threshold) {
return 1.0 * getNumberOfCoveredPoints(threshold) / coverage.length;
}
}

0 comments on commit 94b72fc

Please sign in to comment.
You can’t perform that action at this time.