Skip to content

Commit

Permalink
Related to #7
Browse files Browse the repository at this point in the history
  • Loading branch information
PoslavskySV committed Sep 28, 2015
1 parent 73a71e5 commit 2934621
Show file tree
Hide file tree
Showing 9 changed files with 227 additions and 120 deletions.
1 change: 1 addition & 0 deletions CHANGELOG_CURRENT
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
New feature: optional short column names in `export...` action to simplify further data analysis using data table processing libraries like Pandas or R/DataFrames. (`-s` / `--no-spaces` in `exportAlignments` and `exportClones`)
New export fields: `-defaultAnchorPoints` outputs positions of default anchor point in aligned reads or clonal sequence (this column is added to default output format), `-positionOf` outputs position of specified anchor point, `-lengthOf` outputs lengt of specified gene feature
Added `V5UTRBeginTrimmed` anchor point, `V5UTR` gene feature renamed to `V5UTRGermline`, trimmed `V5UTR` gene feature added
Added `--filter-out-of-frames` and `--filter-stops` options in `exportClones`
minor: some column names in output tab-delimited files slightly changed
minor: NPE in exportAlignmentsPretty fixed
minor: New anchor poins added to exportAlignmentsPretty output
6 changes: 5 additions & 1 deletion src/main/java/com/milaboratory/mixcr/basictypes/Clone.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@ public void setParentCloneSet(CloneSet set) {
public double getFraction() {
if (parent == null)
throw new NullPointerException("Parent not set yet.");
return 1.0 * count / parent.getTotalCount();
return getFraction(parent.getTotalCount());
}

public double getFraction(long totalCount) {
return 1.0 * count / totalCount;
}

public GeneFeature[] getAssemblingFeatures() {
Expand Down
16 changes: 16 additions & 0 deletions src/main/java/com/milaboratory/mixcr/basictypes/CloneSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
*/
package com.milaboratory.mixcr.basictypes;

import cc.redberry.primitives.Filter;
import com.milaboratory.mixcr.reference.Allele;
import com.milaboratory.mixcr.reference.AlleleId;
import com.milaboratory.mixcr.reference.GeneFeature;
Expand Down Expand Up @@ -120,4 +121,19 @@ public long getTotalCount() {
public Iterator<Clone> iterator() {
return clones.iterator();
}

/**
* WARNING: in will be destroyed
*/
public static CloneSet transform(CloneSet in, Filter<Clone> filter) {
List<Clone> newClones = new ArrayList<>(in.size());
for (int i = 0; i < in.size(); ++i) {
Clone c = in.get(i);
if (filter.accept(c)) {
c.parent = null;
newClones.add(c);
}
}
return new CloneSet(newClones, in.usedAlleles, in.alignedFeatures, in.assemblingFeatures);
}
}
2 changes: 1 addition & 1 deletion src/main/java/com/milaboratory/mixcr/cli/ActionExport.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ protected ActionExport(ActionExportParameters parameters) {

@Override
public void go(ActionHelper helper) throws Exception {
if (parameters.fields) {
if (parameters.listFields) {
helper.getDefaultPrintStream().print(parameters.printFieldsHelp());
return;
}
Expand Down
31 changes: 29 additions & 2 deletions src/main/java/com/milaboratory/mixcr/cli/ActionExportClones.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,19 @@
*/
package com.milaboratory.mixcr.cli;

import cc.redberry.primitives.Filter;
import com.milaboratory.core.sequence.AminoAcidSequence;
import com.milaboratory.core.sequence.NSequenceWithQuality;
import com.milaboratory.mixcr.basictypes.Clone;
import com.milaboratory.mixcr.basictypes.CloneSet;
import com.milaboratory.mixcr.basictypes.CloneSetIO;
import com.milaboratory.mixcr.basictypes.IOUtil;
import com.milaboratory.mixcr.export.InfoWriter;
import com.milaboratory.mixcr.reference.GeneFeature;
import com.milaboratory.mixcr.reference.LociLibraryManager;
import com.milaboratory.util.CanReportProgressAndStage;
import com.milaboratory.util.SmartProgressReporter;

import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.List;

Expand All @@ -52,6 +54,8 @@ public void go0() throws Exception {
try (InputStream inputStream = IOUtil.createIS(parameters.inputFile);
InfoWriter<Clone> writer = new InfoWriter<>(parameters.outputFile)) {
CloneSet set = CloneSetIO.read(inputStream, LociLibraryManager.getDefault());
if (parameters.filterOutOfFrames || parameters.filterStopCodons)
set = CloneSet.transform(set, new CFilter(parameters.filterOutOfFrames, parameters.filterStopCodons));
writer.attachInfoProviders((List) parameters.exporters);
ExportClones exportClones = new ExportClones(set, writer);
SmartProgressReporter.startProgressReport(exportClones);
Expand All @@ -65,6 +69,29 @@ public String command() {
return "exportClones";
}

private static final class CFilter implements Filter<Clone> {
final boolean filterOutOfFrames, filterStopCodons;

public CFilter(boolean filterOutOfFrames, boolean filterStopCodons) {
this.filterOutOfFrames = filterOutOfFrames;
this.filterStopCodons = filterStopCodons;
}

@Override
public boolean accept(Clone clone) {
if (filterOutOfFrames) {
NSequenceWithQuality cdr3 = clone.getFeature(GeneFeature.CDR3);
if (cdr3 == null || cdr3.size() % 3 != 0)
return false;
}
if (filterStopCodons)
for (int i = 0; i < clone.numberOfTargets(); i++)
if (AminoAcidSequence.translateFromCenter(clone.getTarget(i).getSequence()).containStops())
return false;
return true;
}
}

private static final class ExportClones implements CanReportProgressAndStage {
final CloneSet clones;
final InfoWriter<Clone> writer;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public ActionExportParameters(Class clazz, String defaultPreset) {
description[0].add("-h, --help");
description[1].add("print this help message");

description[0].add(FIELDS_SHORT + ", " + FIELDS_LONG);
description[0].add(LIST_FIELDS_SHORT + ", " + LIST_FIELDS_LONG);
description[1].add("print available fields to export");

description[0].add(PRESET_SHORT + ", " + PRESET_LONG);
Expand All @@ -77,20 +77,28 @@ public ActionExportParameters(Class clazz, String defaultPreset) {
description[1].add("output short versions of column headers which facilitates analysis with Pandas, " +
"R/DataFrames or other data tables processing library.");

if (clazz.equals(Clone.class)) {
description[0].add(FILTER_OUT_OF_FRAMES);
description[1].add("exclude out of frames (fractions will be recalculated)");

description[0].add(FILTER_STOP_CODONS);
description[1].add("exclude sequences containing stop codons (fractions will be recalculated)");
}

this.helpString =
"Usage: export(Type) [options] input_file output_file\n" +
"Options:\n" +
Util.printTwoColumns(4, description[0], description[1], 20, 50, 5, "\n") + "\n" +
Util.printTwoColumns(4, description[0], description[1], 23, 50, 5, "\n") + "\n" +
"Examples:\n" +
" exportClones -p all -nFeature CDR1 input.clns output.txt\n" +
" exportAlignments -pf params.txt -nFeature CDR1 -dAlignments input.clns output.txt\n";
description = FieldExtractors.getDescription(clazz);
this.fieldsHelpString = "Available export fields:\n" + Util.printTwoColumns(
description[0], description[1], 20, 50, 5, "\n");

description[0], description[1], 23, 50, 5, "\n");
}

public Boolean fields = false;
public Boolean listFields = false;
public boolean filterOutOfFrames = false, filterStopCodons = false;
public String inputFile;
public String outputFile;
public ArrayList<FieldExtractor> exporters;
Expand All @@ -105,16 +113,23 @@ public String printHelp() {

public final void parseParameters(String[] args) throws ParameterException {
trim(args);
for (String arg : args) {
if (arg.equals(FIELDS_SHORT) || arg.equals(FIELDS_LONG)) {
fields = true;
return;
}
if (arg.equals("-h") || arg.equals("--help")) {
help = true;
return;
for (String arg : args)
switch (arg) {
case LIST_FIELDS_SHORT:
case LIST_FIELDS_LONG:
listFields = true;
return;
case "-h":
case "--help":
help = true;
return;
case FILTER_OUT_OF_FRAMES:
filterOutOfFrames = true;
break;
case FILTER_STOP_CODONS:
filterStopCodons = true;
break;
}
}

if (args.length < 2)
throw new ParameterException("No output file specified.");
Expand All @@ -127,7 +142,10 @@ public final void parseParameters(String[] args) throws ParameterException {
break;
}

if (args.length == 2 || (outputMode == OutputMode.ScriptingFriendly && args.length == 3))
int i = ((outputMode == OutputMode.ScriptingFriendly) ? 1 : 0)
+ (filterOutOfFrames ? 1 : 0)
+ (filterStopCodons ? 1 : 0);
if (args.length - i == 2)
exporters = getPresetParameters(outputMode, clazz, defaultPreset);
else
exporters = parseParametersString(outputMode, clazz, args, 0, args.length - 2);
Expand All @@ -143,6 +161,9 @@ public static ArrayList<FieldExtractor> parseParametersString(OutputMode outputM
String arg = args[i];
if (isParsingFriendlyFlag(arg))
continue;
//skip options
if (arg.equals(FILTER_OUT_OF_FRAMES) || arg.equals(FILTER_STOP_CODONS))
continue;
if (arg.charAt(0) == '-') {
if (!exporter.isEmpty()) {
FieldExtractor exp = FieldExtractors.parse(outputMode,
Expand Down Expand Up @@ -258,10 +279,12 @@ private static void trim(String[] args) {
PRESET_LONG = "--preset",
PRESET_FILE_SHORT = "-pf",
PRESET_FILE_LONG = "--presetFile",
FIELDS_SHORT = "-l",
FIELDS_LONG = "--listFields",
LIST_FIELDS_SHORT = "-l",
LIST_FIELDS_LONG = "--listFields",
PARSING_LONG = "--no-spaces",
PARSING_SHORT = "-s";
PARSING_SHORT = "-s",
FILTER_OUT_OF_FRAMES = "--filter-out-of-frames",
FILTER_STOP_CODONS = "--filter-stops";

public static boolean isPresetParameter(String string) {
return string.equals(PRESET_SHORT) || string.equals(PRESET_LONG);
Expand Down
17 changes: 12 additions & 5 deletions src/main/java/com/milaboratory/mixcr/cli/Util.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
package com.milaboratory.mixcr.cli;

import com.milaboratory.mixcr.reference.Locus;
import gnu.trove.map.hash.TIntObjectHashMap;

import java.io.File;
import java.io.FileOutputStream;
Expand Down Expand Up @@ -156,11 +157,17 @@ public static String printTwoColumns(int offset, List<String> left, List<String>
return sb.toString();
}

public static String spacer(int sep) {
StringBuilder sb = new StringBuilder(sep);
for (int i = 0; i < sep; ++i)
sb.append(" ");
return sb.toString();
private static TIntObjectHashMap<String> spacesCache = new TIntObjectHashMap<>();

public static synchronized String spacer(int sep) {
String s = spacesCache.get(sep);
if (s == null) {
StringBuilder sb = new StringBuilder(sep);
for (int i = 0; i < sep; ++i)
sb.append(" ");
spacesCache.put(sep, s = sb.toString());
}
return s;
}

private static int lineBreakPos(String str, int width) {
Expand Down

0 comments on commit 2934621

Please sign in to comment.