Skip to content

Commit

Permalink
Added UTR5BeginTrimmed reference point. ActivationPoint of alignment-…
Browse files Browse the repository at this point in the history
…attached reference point added to correctly detect certain ref.points. Magic bytes for VDJCA / CLNS files changed due to binary format change. Minor corrections.

This fixes #34
  • Loading branch information
dbolotin committed Sep 24, 2015
1 parent 134c3e9 commit 35d9562
Show file tree
Hide file tree
Showing 12 changed files with 138 additions and 36 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG_CURRENT
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
New feature: optional short column names in `export...` action to simplify further data analysis using data table processing libraries like Pandas or R/DataFrames. (`-s` / `--no-spaces` in `exportAlignments` and `exportClones`)
Added `UTR5BeginTrimmed` reference point
minor: some column names in output tab-delimited files slightly changed
minor: NPE in exportAlignmentsPretty fixed
minor: NPE in exportAlignmentsPretty fixed
minor: New reference poins added to exportAlignmentsPretty output
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
import java.util.List;

public final class CloneSetIO {
static final String MAGIC = "MiXCR.CLNS.V01";
static final String MAGIC = "MiXCR.CLNS.V02";
static final int MAGIC_LENGTH = 14;
static final byte[] MAGIC_BYTES = MAGIC.getBytes(StandardCharsets.US_ASCII);

Expand Down Expand Up @@ -148,7 +148,7 @@ public static CloneSet read(InputStream inputStream, AlleleResolver alleleResolv
input.readFully(magicBytes);

if (!Arrays.equals(magicBytes, MAGIC_BYTES))
throw new RuntimeException("Wrong file format.");
throw new RuntimeException("Unsupported file format; .clns file of version " + new String(magicBytes) + " while you are running MiXCR " + MAGIC);

GeneFeature[] assemblingFeatures = input.readObject(GeneFeature[].class);
EnumMap<GeneType, GeneFeature> alignedFeatures = IO.readGF2GTMap(input);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,21 @@ public int getPosition(ReferencePoint referencePoint) {
Alignment<NucleotideSequence> alignment = hit.getAlignment(targetIndex);
if (alignment == null)
return -1;
if (referencePoint.isAttachedToLeftAlignmentBound())

int positionOfActivationPoint = -2;
if (referencePoint.getActivationPoint() != null)
positionOfActivationPoint = hit.getAllele().getPartitioning()
.getRelativePosition(hit.getAlignedFeature(), referencePoint.getActivationPoint());

if (referencePoint.isAttachedToLeftAlignmentBound()) {
positionInSeq1 = alignment.getSequence1Range().getFrom();
else
if (positionOfActivationPoint != -2 && (positionOfActivationPoint == -1 || positionInSeq1 > positionOfActivationPoint))
return -1;
} else {
positionInSeq1 = alignment.getSequence1Range().getTo();
if (positionOfActivationPoint != -2 && (positionOfActivationPoint == -1 || positionInSeq1 < positionOfActivationPoint))
return -1;
}
positionInSeq1 += referencePoint.getOffset();
position = alignment.convertPosition(positionInSeq1);
} else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ public static MultiAlignmentHelper getTargetAsMultiAlignment(VDJCAlignments vdjc
}

public static final PointToDraw[] points = new PointToDraw[]{
pd(ReferencePoint.UTR5BeginTrimmed, "<5'UTR"),
pd(ReferencePoint.UTR5End, "5'UTR><L1"),
pd(ReferencePoint.L1End, "L1>"),
pd(ReferencePoint.L2Begin, "<L2"),
Expand All @@ -95,6 +96,10 @@ public static MultiAlignmentHelper getTargetAsMultiAlignment(VDJCAlignments vdjc
pd(ReferencePoint.CDR2Begin, "FR2><CDR2"),
pd(ReferencePoint.FR3Begin, "CDR2><FR3"),
pd(ReferencePoint.CDR3Begin, "FR3><CDR3"),
pd(ReferencePoint.VEndTrimmed, "V>"),
pd(ReferencePoint.DBeginTrimmed, "<D"),
pd(ReferencePoint.DEndTrimmed, "D>"),
pd(ReferencePoint.JBeginTrimmed, "<J"),
pd(ReferencePoint.CDR3End, "CDR3><FR4"),
pd(ReferencePoint.FR4End, "FR4>")
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public void init() {
byte[] magic = new byte[MAGIC_LENGTH];
input.readFully(magic);
if (!Arrays.equals(magic, MAGIC_BYTES))
throw new RuntimeException("Conflicting file format; .vdjca file of version " + new String(magic) + " while you are running MiXCR " + MAGIC);
throw new RuntimeException("Unsupported file format; .vdjca file of version " + new String(magic) + " while you are running MiXCR " + MAGIC);

parameters = input.readObject(VDJCAlignerParameters.class);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,19 @@
*/
package com.milaboratory.mixcr.basictypes;

import com.milaboratory.core.io.CompressionType;
import com.milaboratory.mixcr.reference.Allele;
import com.milaboratory.mixcr.vdjaligners.VDJCAligner;
import com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters;
import com.milaboratory.primitivio.PrimitivO;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.List;

public final class VDJCAlignmentsWriter implements AutoCloseable {
static final String MAGIC = "MiXCR.VDJC.V03";
static final String MAGIC = "MiXCR.VDJC.V04";
static final int MAGIC_LENGTH = 14;
static final byte[] MAGIC_BYTES = MAGIC.getBytes(StandardCharsets.US_ASCII);
final PrimitivO output;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/milaboratory/mixcr/reference/Allele.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ protected Allele(Gene gene, String name, boolean isFunctional) {
}

public final boolean isComplete() {
return gene.getGroup().getType().getCompleteNumberOfReferencePoints() == getPartitioning().numberOfDefinedPoins();
return gene.getGroup().getType().getCompleteNumberOfReferencePoints() == getPartitioning().numberOfDefinedPoints();
}

public final LocusContainer getLocusContainer() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,40 +34,56 @@
enum BasicReferencePoint implements java.io.Serializable {
// Points in V
UTR5Begin(0, GeneType.Variable, 0),
UTR5EndL1Begin(1, Variable, 1),
L1EndVIntronBegin(2, Variable, 2),
VIntronEndL2Begin(3, Variable, 3),
L2EndFR1Begin(4, Variable, 4),
FR1EndCDR1Begin(5, Variable, 5),
CDR1EndFR2Begin(6, Variable, 6),
FR2EndCDR2Begin(7, Variable, 7),
CDR2EndFR3Begin(8, Variable, 8),
FR3EndCDR3Begin(9, Variable, 9),
VEndTrimmed(-2, Variable, 10),
VEnd(10, Variable, 11),
UTR5BeginTrimmed(-1, GeneType.Variable, 1, "UTR5End"),
UTR5EndL1Begin(1, Variable, 2),
L1EndVIntronBegin(2, Variable, 3),
VIntronEndL2Begin(3, Variable, 4),
L2EndFR1Begin(4, Variable, 5),
FR1EndCDR1Begin(5, Variable, 6),
CDR1EndFR2Begin(6, Variable, 7),
FR2EndCDR2Begin(7, Variable, 8),
CDR2EndFR3Begin(8, Variable, 9),
FR3EndCDR3Begin(9, Variable, 10),
VEndTrimmed(-2, Variable, 11, "CDR3Begin(-3)"),
VEnd(10, Variable, 12),

// Points in D
DBegin(11, Diversity, 12),
DBeginTrimmed(-1, Diversity, 13),
DEndTrimmed(-2, Diversity, 14),
DEnd(12, Diversity, 15),
DBegin(11, Diversity, 13),
DBeginTrimmed(-1, Diversity, 14, null),
DEndTrimmed(-2, Diversity, 15, null),
DEnd(12, Diversity, 16),

// Points in J
JBegin(13, Joining, 16),
JBeginTrimmed(-1, Joining, 17),
CDR3EndFR4Begin(14, Joining, 18),
FR4End(15, Joining, 19),
JBegin(13, Joining, 17),
JBeginTrimmed(-1, Joining, 18, "CDR3End(+3)"),
CDR3EndFR4Begin(14, Joining, 19),
FR4End(15, Joining, 20),

// Points in C
CBegin(16, Constant, 20),
CExon1End(17, Constant, 21),
CEnd(18, Constant, 22);
CBegin(16, Constant, 21),
CExon1End(17, Constant, 22),
CEnd(18, Constant, 23);

final int orderingIndex;
final int index;
final GeneType geneType;
BasicReferencePoint trimmedVersion;

/* Only for trimmed (attached to alignment boundary) points */

// Defined for alignment boundary attached reference points
// E.g. UTR5BeginTrimmed is a left boundary of an alignment but only if it is on the left side of UTR5End/L1Begin
// Not an object to solve cyclic dependence on ReferencePoint
final String activationPointString;
volatile ReferencePoint activationPoint;


BasicReferencePoint(int index, GeneType geneType, int orderingIndex) {
this(index, geneType, orderingIndex, null);
}

BasicReferencePoint(int index, GeneType geneType, int orderingIndex, String activationPoint) {
this.activationPointString = activationPoint;
this.index = index;
this.geneType = geneType;
this.orderingIndex = orderingIndex;
Expand All @@ -90,6 +106,20 @@ public boolean isTrimmable() {
return trimmedVersion != null;
}

public ReferencePoint getActivationPoint() {
if (activationPointString == null)
return null;

if (activationPoint == null) {
synchronized (this) {
if (activationPoint == null)
return activationPoint = ReferencePoint.parse(activationPointString);
}
}

return activationPoint;
}

private final static BasicReferencePoint[] allReferencePoints;
public static final int TOTAL_NUMBER_OF_REFERENCE_POINTS = 19;

Expand All @@ -106,6 +136,7 @@ public boolean isTrimmable() {
for (BasicReferencePoint rp : allReferencePoints)
assert rp != null;

UTR5Begin.trimmedVersion = UTR5BeginTrimmed;
VEnd.trimmedVersion = VEndTrimmed;
DBegin.trimmedVersion = DBeginTrimmed;
DEnd.trimmedVersion = DEndTrimmed;
Expand Down
17 changes: 15 additions & 2 deletions src/main/java/com/milaboratory/mixcr/reference/ReferencePoint.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,13 @@ public final class ReferencePoint implements Comparable<ReferencePoint>, java.io
*/
public static final ReferencePoint UTR5Begin = new ReferencePoint(BasicReferencePoint.UTR5Begin),
/**
* End of 5'UTR, beginning of IG/TCR CDS
* End of 5'UTR, beginning of IG/TCR CDS as listed in database
*/
UTR5End = new ReferencePoint(BasicReferencePoint.UTR5EndL1Begin),
/**
* End of 5'UTR, beginning of IG/TCR CDS as observed in the data
*/
UTR5BeginTrimmed = new ReferencePoint(BasicReferencePoint.UTR5BeginTrimmed),
/**
* End of 5'UTR, beginning of IG/TCR CDS
*/
Expand Down Expand Up @@ -181,14 +185,19 @@ public final class ReferencePoint implements Comparable<ReferencePoint>, java.io
*/
CBegin = new ReferencePoint(BasicReferencePoint.CBegin),
/**
* End of C Region first exon (Exon 3)
* End of C Region first exon (Exon 3 of assembled TCR/IG gene)
*/
CExon1End = new ReferencePoint(BasicReferencePoint.CExon1End),
/**
* End of C Region
*/
CEnd = new ReferencePoint(BasicReferencePoint.CEnd);

/**
* Default set of reference points.
*/
public static final ReferencePoint[] DefaultReferencePoints = {};

static final long serialVersionUID = 1L;
final BasicReferencePoint basicPoint;
final int offset;
Expand Down Expand Up @@ -288,6 +297,10 @@ public ReferencePoint getWithoutOffset() {
return new ReferencePoint(basicPoint);
}

public ReferencePoint getActivationPoint() {
return basicPoint.getActivationPoint();
}

@Override
public int compareTo(ReferencePoint o) {
int c = basicPoint.compareTo(o.basicPoint);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public ReferencePoints(int start, int[] points) {
this.points = array;
}

public int numberOfDefinedPoins() {
public int numberOfDefinedPoints() {
int ret = 0;
for (int point : points) {
if (point >= 0)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Copyright (c) 2014-2015, Bolotin Dmitry, Chudakov Dmitry, Shugay Mikhail
* (here and after addressed as Inventors)
* All Rights Reserved
*
* Permission to use, copy, modify and distribute any part of this program for
* educational, research and non-profit purposes, by non-profit institutions
* only, without fee, and without a written agreement is hereby granted,
* provided that the above copyright notice, this paragraph and the following
* three paragraphs appear in all copies.
*
* Those desiring to incorporate this work into commercial products or use for
* commercial purposes should contact the Inventors using one of the following
* email addresses: chudakovdm@mail.ru, chudakovdm@gmail.com
*
* IN NO EVENT SHALL THE INVENTORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
* SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
* ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN IF THE INVENTORS HAS BEEN
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE INVENTORS HAS
* NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
* MODIFICATIONS. THE INVENTORS MAKES NO REPRESENTATIONS AND EXTENDS NO
* WARRANTIES OF ANY KIND, EITHER IMPLIED OR EXPRESS, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
* PARTICULAR PURPOSE, OR THAT THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY
* PATENT, TRADEMARK OR OTHER RIGHTS.
*/
package com.milaboratory.mixcr.reference;

import org.junit.Assert;
import org.junit.Test;

public class ReferencePointTest {
@Test
public void test1() throws Exception {
ReferencePoint[] pointsToTest = new ReferencePoint[]{ReferencePoint.UTR5BeginTrimmed,
ReferencePoint.VEndTrimmed, ReferencePoint.JBeginTrimmed};
for (ReferencePoint referencePoint : pointsToTest)
Assert.assertNotNull(referencePoint.getActivationPoint());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public void test1() throws Exception {
VJAlignmentOrder.JThenV,
false, false,
120.0f, 5, 0.7f, 0.7f, PairedEndReadsLayout.Opposite, new MergerParameters(
QualityMergingAlgorithm.SumSubtraction, null, 12, 0.12));
QualityMergingAlgorithm.SumSubtraction, null, 12, null, 0.12));
String str = GlobalObjectMappers.PRETTY.writeValueAsString(paramentrs);
VDJCAlignerParameters deser = GlobalObjectMappers.PRETTY.readValue(str, VDJCAlignerParameters.class);
assertEquals(paramentrs, deser);
Expand Down

0 comments on commit 35d9562

Please sign in to comment.