Permalink
Browse files

Implementation of uber action (analyze) and enhancement of MiXCR bina…

…ry format (store file history) (#395)

* save

* init

* Minor cosmetic:
 - failed to implement subcommands since printed usage with "-h"  is awful
 - hide some parameters for rep-seq action

* Saving analysis history in binary files on road

* update milib submodule

* fixing tests

* Storing full pipeline configuration in all MiXCR binary files almost done (tests needed).

* Simple action to print pipeline info for binary file

* Final implementation of pipeline configurations in binary files & uber action

* update milib submodule

* minor

* update for new JCommander

* further fixes & b-cell option

* update repseq dep

* Minor fixes & enhancements. This fixes #398
  • Loading branch information...
PoslavskySV authored and dbolotin committed Jul 16, 2018
1 parent baebcff commit e18cd32e7786d91941ad72f512032232c20dd67d
Showing with 1,892 additions and 430 deletions.
  1. +1 −1 milib
  2. +1 −1 pom.xml
  3. +1 −1 repseqio
  4. +32 −0 src/main/java/com/milaboratory/mixcr/basictypes/ActionConfiguration.java
  5. +13 −3 src/main/java/com/milaboratory/mixcr/basictypes/ClnAReader.java
  6. +14 −9 src/main/java/com/milaboratory/mixcr/basictypes/ClnAWriter.java
  7. +129 −0 src/main/java/com/milaboratory/mixcr/basictypes/ClnsReader.java
  8. +96 −0 src/main/java/com/milaboratory/mixcr/basictypes/ClnsWriter.java
  9. +13 −164 src/main/java/com/milaboratory/mixcr/basictypes/CloneSetIO.java
  10. +187 −0 src/main/java/com/milaboratory/mixcr/basictypes/PipelineConfiguration.java
  11. +46 −0 src/main/java/com/milaboratory/mixcr/basictypes/PipelineConfigurationReader.java
  12. +6 −0 src/main/java/com/milaboratory/mixcr/basictypes/PipelineConfigurationWriter.java
  13. +12 −1 src/main/java/com/milaboratory/mixcr/basictypes/VDJCAlignmentsReader.java
  14. +20 −5 src/main/java/com/milaboratory/mixcr/basictypes/VDJCAlignmentsWriter.java
  15. +6 −4 src/main/java/com/milaboratory/mixcr/basictypes/VDJCAlignmentsWriterI.java
  16. +21 −0 src/main/java/com/milaboratory/mixcr/cli/AbstractActionWithResumeOption.java
  17. +128 −40 src/main/java/com/milaboratory/mixcr/cli/ActionAlign.java
  18. +4 −4 src/main/java/com/milaboratory/mixcr/cli/ActionAlignmentsDiff.java
  19. +105 −42 src/main/java/com/milaboratory/mixcr/cli/ActionAssemble.java
  20. +2 −1 src/main/java/com/milaboratory/mixcr/cli/ActionAssembleContigs.java
  21. +82 −26 src/main/java/com/milaboratory/mixcr/cli/ActionAssemblePartialAlignments.java
  22. +6 −6 src/main/java/com/milaboratory/mixcr/cli/ActionClonesDiff.java
  23. +8 −9 src/main/java/com/milaboratory/mixcr/cli/ActionExportAlignmentsPretty.java
  24. +2 −2 src/main/java/com/milaboratory/mixcr/cli/ActionExportCloneReads.java
  25. +11 −10 src/main/java/com/milaboratory/mixcr/cli/ActionExportClones.java
  26. +5 −5 src/main/java/com/milaboratory/mixcr/cli/ActionExportParameters.java
  27. +82 −15 src/main/java/com/milaboratory/mixcr/cli/ActionExtend.java
  28. +73 −19 src/main/java/com/milaboratory/mixcr/cli/ActionFilterAlignments.java
  29. +2 −2 src/main/java/com/milaboratory/mixcr/cli/ActionInfo.java
  30. +61 −15 src/main/java/com/milaboratory/mixcr/cli/ActionMergeAlignments.java
  31. +78 −0 src/main/java/com/milaboratory/mixcr/cli/ActionParametersWithResumeOption.java
  32. +91 −0 src/main/java/com/milaboratory/mixcr/cli/ActionPipelineInfo.java
  33. +52 −8 src/main/java/com/milaboratory/mixcr/cli/ActionSlice.java
  34. +34 −8 src/main/java/com/milaboratory/mixcr/cli/ActionSortAlignments.java
  35. +7 −13 src/main/java/com/milaboratory/mixcr/cli/Main.java
  36. +359 −0 src/main/java/com/milaboratory/mixcr/cli/UberAction.java
  37. +1 −1 src/main/java/com/milaboratory/mixcr/partialassembler/PartialAlignmentsAssembler.java
  38. +24 −0 src/main/java/com/milaboratory/mixcr/partialassembler/PartialAlignmentsAssemblerParameters.java
  39. +1 −1 src/main/java/com/milaboratory/mixcr/util/RunMiXCR.java
  40. +5 −6 src/test/java/com/milaboratory/mixcr/assembler/CloneAssemblerRunnerTest.java
  41. +2 −2 src/test/java/com/milaboratory/mixcr/basictypes/ClnAReaderTest.java
  42. +1 −1 src/test/java/com/milaboratory/mixcr/basictypes/IOTest.java
  43. +1 −1 src/test/java/com/milaboratory/mixcr/basictypes/RandomAccessVDJCAReaderTest.java
  44. +61 −0 src/test/java/com/milaboratory/mixcr/cli/UberActionTest.java
  45. +5 −3 src/test/java/com/milaboratory/mixcr/util/RunMiXCRTest.java
  46. +1 −1 src/test/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerSTest.java
View
@@ -76,7 +76,7 @@
<dependency>
<groupId>com.beust</groupId>
<artifactId>jcommander</artifactId>
<version>1.48</version>
<version>1.72</version>
<optional>true</optional>
</dependency>
@@ -0,0 +1,32 @@
package com.milaboratory.mixcr.basictypes;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonSubTypes;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import com.milaboratory.mixcr.cli.*;
import com.milaboratory.primitivio.annotations.Serializable;
/**
* A data structure which holds the whole set of parameters which affect specific MiXCR action.
*/
@JsonAutoDetect(
fieldVisibility = JsonAutoDetect.Visibility.ANY,
isGetterVisibility = JsonAutoDetect.Visibility.NONE,
getterVisibility = JsonAutoDetect.Visibility.NONE)
@JsonTypeInfo(
use = JsonTypeInfo.Id.NAME,
include = JsonTypeInfo.As.PROPERTY,
property = "type")
@JsonSubTypes({
@JsonSubTypes.Type(value = ActionAlign.AlignConfiguration.class, name = "align-configuration"),
@JsonSubTypes.Type(value = ActionAssemble.AssembleConfiguration.class, name = "assemble-configuration"),
@JsonSubTypes.Type(value = ActionAssemblePartialAlignments.AssemblePartialConfiguration.class, name = "assemble-partial-configuration"),
@JsonSubTypes.Type(value = ActionExtend.ExtendConfiguration.class, name = "extend-configuration"),
@JsonSubTypes.Type(value = ActionMergeAlignments.MergeConfiguration.class, name = "merge-configuration"),
@JsonSubTypes.Type(value = ActionFilterAlignments.FilterConfiguration.class, name = "filter-configuration"),
@JsonSubTypes.Type(value = ActionSortAlignments.SortConfiguration.class, name = "sort-configuration"),
@JsonSubTypes.Type(value = ActionSlice.SliceConfiguration.class, name = "slice-configuration")})
@Serializable(asJson = true)
public interface ActionConfiguration {
String actionName();
}
@@ -30,6 +30,7 @@
import cc.redberry.pipe.OutputPort;
import com.milaboratory.mixcr.assembler.CloneAssemblerParameters;
import com.milaboratory.mixcr.basictypes.ClnsReader.GT2GFAdapter;
import com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters;
import com.milaboratory.primitivio.PipeDataInputReader;
import com.milaboratory.primitivio.PrimitivI;
@@ -56,7 +57,9 @@
/**
* Reader of CLNA file format.
*/
public final class ClnAReader implements AutoCloseable {
public final class ClnAReader implements
PipelineConfigurationReader,
AutoCloseable {
public static final int DEFAULT_CHUNK_SIZE = 262144;
final int chunkSize;
/**
@@ -80,9 +83,10 @@
// Read form file header
final PipelineConfiguration configuration;
final VDJCAlignerParameters alignerParameters;
final CloneAssemblerParameters assemblerParameters;
final CloneSetIO.GT2GFAdapter alignedFeatures;
final GT2GFAdapter alignedFeatures;
final List<VDJCGene> genes;
final int numberOfClones;
@@ -145,9 +149,10 @@ public ClnAReader(Path path, VDJCLibraryRegistry libraryRegistry, int chunkSize)
input = new PrimitivI(new InputDataStream(ClnAWriter.MAGIC_LENGTH + 4, firstClonePosition));
this.versionInfo = input.readUTF();
this.configuration = input.readObject(PipelineConfiguration.class);
this.alignerParameters = input.readObject(VDJCAlignerParameters.class);
this.assemblerParameters = input.readObject(CloneAssemblerParameters.class);
this.alignedFeatures = new CloneSetIO.GT2GFAdapter(IO.readGF2GTMap(input));
this.alignedFeatures = new GT2GFAdapter(IO.readGF2GTMap(input));
this.genes = IOUtil.readGeneReferences(input, libraryRegistry);
}
@@ -159,6 +164,11 @@ public ClnAReader(String path, VDJCLibraryRegistry libraryRegistry) throws IOExc
this(Paths.get(path), libraryRegistry, DEFAULT_CHUNK_SIZE);
}
@Override
public PipelineConfiguration getPipelineConfiguration() {
return configuration;
}
/**
* Aligner parameters
*/
@@ -46,18 +46,17 @@
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.*;
/**
* Writer for CLNA file format.
*
* Usage: 1. Constructor (opens the output file, buffered) 2. writeClones() 3. sortAlignments() 4.
* writeAlignmentsAndIndex() 5. close()
*/
public final class ClnAWriter implements AutoCloseable, CanReportProgressAndStage {
public final class ClnAWriter implements PipelineConfigurationWriter,
AutoCloseable,
CanReportProgressAndStage {
static final String MAGIC_V2 = "MiXCR.CLNA.V02";
static final String MAGIC = MAGIC_V2;
static final int MAGIC_LENGTH = MAGIC.length();
@@ -71,6 +70,8 @@
*/
private final CountingOutputStream outputStream;
private final PrimitivO output;
private final PipelineConfiguration configuration;
/**
* Counter OP used to report progress during stage 2
*/
@@ -80,11 +81,12 @@
private volatile long numberOfAlignments = -1, numberOfAlignmentsWritten = 0;
private volatile boolean clonesBlockFinished = false, finished = false;
public ClnAWriter(String fileName) throws IOException {
this(new File(fileName));
public ClnAWriter(PipelineConfiguration configuration, String fileName) throws IOException {
this(configuration, new File(fileName));
}
public ClnAWriter(File file) throws IOException {
public ClnAWriter(PipelineConfiguration configuration, File file) throws IOException {
this.configuration = configuration;
this.tempFile = new File(file.getAbsolutePath() + ".presorted");
this.outputStream = new CountingOutputStream(new BufferedOutputStream(
new FileOutputStream(file), 131072));
@@ -109,7 +111,7 @@ public synchronized void writeClones(CloneSet cloneSet) {
this.usedGenes = cloneSet.getUsedGenes();
// Saving features to align
this.featureToAlign = new CloneSetIO.GT2GFAdapter(cloneSet.alignedFeatures);
this.featureToAlign = new ClnsReader.GT2GFAdapter(cloneSet.alignedFeatures);
// Writing number of clones ahead of any other content to make it available
// in known file position (MAGIC_LENGTH)
@@ -119,6 +121,9 @@ public synchronized void writeClones(CloneSet cloneSet) {
output.writeUTF(MiXCRVersionInfo.get()
.getVersionString(MiXCRVersionInfo.OutputType.ToFile));
// Writing full pipeline configuration
output.writeObject(configuration);
// Writing aligner parameters
output.writeObject(cloneSet.alignmentParameters);
@@ -0,0 +1,129 @@
package com.milaboratory.mixcr.basictypes;
import com.milaboratory.mixcr.assembler.CloneAssemblerParameters;
import com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters;
import com.milaboratory.primitivio.PrimitivI;
import io.repseq.core.*;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.EnumMap;
import java.util.List;
import static com.milaboratory.mixcr.basictypes.ClnsWriter.MAGIC;
import static com.milaboratory.mixcr.basictypes.ClnsWriter.MAGIC_LENGTH;
/**
*
*/
public class ClnsReader implements PipelineConfigurationReader,
AutoCloseable {
private final PrimitivI input;
private final VDJCLibraryRegistry libraryRegistry;
public ClnsReader(PrimitivI input, VDJCLibraryRegistry libraryRegistry) {
this.input = input;
this.libraryRegistry = libraryRegistry;
}
public ClnsReader(InputStream inputStream, VDJCLibraryRegistry libraryRegistry) {
this(new PrimitivI(inputStream), libraryRegistry);
}
public ClnsReader(File file, VDJCLibraryRegistry libraryRegistry) throws IOException {
this(IOUtil.createIS(file), libraryRegistry);
}
public ClnsReader(String file, VDJCLibraryRegistry libraryRegistry) throws IOException {
this(new File(file), libraryRegistry);
}
private boolean initialized = false;
private CloneSet cloneSet = null;
private PipelineConfiguration pipelineConfiguration = null;
private VDJCAlignerParameters alignerParameters = null;
private CloneAssemblerParameters assemblerParameters = null;
private synchronized void init() {
if (initialized)
return;
// Registering custom serializer
input.getSerializersManager().registerCustomSerializer(GeneFeature.class, new GeneFeatureSerializer(true));
byte[] magicBytes = new byte[MAGIC_LENGTH];
input.readFully(magicBytes);
String magicString = new String(magicBytes);
// SerializersManager serializersManager = input.getSerializersManager();
switch (magicString) {
case MAGIC:
break;
default:
throw new RuntimeException("Unsupported file format; .clns file of version " + magicString +
" while you are running MiXCR " + MAGIC);
}
String versionInfo = input.readUTF();
pipelineConfiguration = input.readObject(PipelineConfiguration.class);
alignerParameters = input.readObject(VDJCAlignerParameters.class);
assemblerParameters = input.readObject(CloneAssemblerParameters.class);
EnumMap<GeneType, GeneFeature> alignedFeatures = IO.readGF2GTMap(input);
List<VDJCGene> genes = IOUtil.readAndRegisterGeneReferences(input, libraryRegistry, new GT2GFAdapter(alignedFeatures));
int count = input.readInt();
List<Clone> clones = new ArrayList<>(count);
for (int i = 0; i < count; i++)
clones.add(input.readObject(Clone.class));
this.cloneSet = new CloneSet(clones, genes, alignedFeatures, alignerParameters, assemblerParameters);
cloneSet.versionInfo = versionInfo;
initialized = true;
}
public CloneSet getCloneSet() {
init();
return cloneSet;
}
@Override
public PipelineConfiguration getPipelineConfiguration() {
init();
return pipelineConfiguration;
}
public VDJCAlignerParameters getAlignerParameters() {
init();
return alignerParameters;
}
public CloneAssemblerParameters getAssemblerParameters() {
init();
return assemblerParameters;
}
@Override
public void close() {
input.close();
}
public static class GT2GFAdapter implements HasFeatureToAlign {
public final EnumMap<GeneType, GeneFeature> map;
public GT2GFAdapter(EnumMap<GeneType, GeneFeature> map) {
this.map = map;
}
@Override
public GeneFeature getFeatureToAlign(GeneType geneType) {
return map.get(geneType);
}
}
}
@@ -0,0 +1,96 @@
package com.milaboratory.mixcr.basictypes;
import com.milaboratory.mixcr.util.MiXCRVersionInfo;
import com.milaboratory.primitivio.PrimitivO;
import com.milaboratory.util.CanReportProgressAndStage;
import io.repseq.core.GeneFeature;
import io.repseq.core.GeneFeatureSerializer;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
/**
*
*/
public class ClnsWriter implements PipelineConfigurationWriter,
CanReportProgressAndStage,
Closeable {
static final String MAGIC_V7 = "MiXCR.CLNS.V07";
static final String MAGIC = MAGIC_V7;
static final int MAGIC_LENGTH = 14;
static final byte[] MAGIC_BYTES = MAGIC.getBytes(StandardCharsets.US_ASCII);
final String stage = "Writing clones";
final PrimitivO output;
final CloneSet cloneSet;
final int size;
final PipelineConfiguration configuration;
private volatile int current;
public ClnsWriter(PipelineConfiguration configuration, CloneSet cloneSet, String fileName) throws IOException {
this(configuration, cloneSet, new File(fileName));
}
public ClnsWriter(PipelineConfiguration configuration, CloneSet cloneSet, File file) throws IOException {
this(configuration, cloneSet, IOUtil.createOS(file));
}
public ClnsWriter(PipelineConfiguration configuration, CloneSet cloneSet, OutputStream outputStream) {
this.output = new PrimitivO(outputStream);
this.configuration = configuration;
this.cloneSet = cloneSet;
this.size = cloneSet.size();
}
@Override
public String getStage() {
return stage;
}
@Override
public double getProgress() {
return (1.0 * current) / size;
}
@Override
public boolean isFinished() {
return current == size;
}
public void write() {
// Registering custom serializer
output.getSerializersManager().registerCustomSerializer(GeneFeature.class, new GeneFeatureSerializer(true));
// Writing magic bytes
output.write(MAGIC_BYTES);
// Writing version information
output.writeUTF(
MiXCRVersionInfo.get().getVersionString(
MiXCRVersionInfo.OutputType.ToFile));
// Writing analysis meta-information
output.writeObject(configuration);
output.writeObject(cloneSet.alignmentParameters);
output.writeObject(cloneSet.assemblerParameters);
IO.writeGT2GFMap(output, cloneSet.alignedFeatures);
IOUtil.writeAndRegisterGeneReferences(output, cloneSet.getUsedGenes(), new ClnsReader.GT2GFAdapter(cloneSet.alignedFeatures));
output.writeInt(cloneSet.getClones().size());
for (Clone clone : cloneSet) {
output.writeObject(clone);
++current;
}
}
@Override
public void close() {
output.close();
}
}
Oops, something went wrong.

0 comments on commit e18cd32

Please sign in to comment.