Skip to content

Commit

Permalink
--no-spaces option for export actions.
Browse files Browse the repository at this point in the history
Minor changes to column names in export.
  • Loading branch information
dbolotin committed Sep 23, 2015
1 parent 92e6975 commit 134c3e9
Show file tree
Hide file tree
Showing 8 changed files with 183 additions and 93 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG_CURRENT
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
New feature: optional short column names in `export...` action to simplify further data analysis using data table processing libraries like Pandas or R/DataFrames. (`-s` / `--no-spaces` in `exportAlignments` and `exportClones`)
minor: some column names in output tab-delimited files slightly changed
minor: NPE in exportAlignmentsPretty fixed
3 changes: 3 additions & 0 deletions doc/export.rst
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ The list of command line parameters for both ``exportAlignments`` and
+---------------------+-------------------------------------------------------------------+
| -pf, --presetFile | load file with a list of fields to export |
+---------------------+-------------------------------------------------------------------+
| -s, --no-spaces | output short versions of column headers which facilitates analysis|
| | with Pandas, R/DataFrames or other data tables processing library |
+---------------------+-------------------------------------------------------------------+

Available fields
----------------
Expand Down
114 changes: 70 additions & 44 deletions src/main/java/com/milaboratory/mixcr/cli/ActionExportParameters.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import com.milaboratory.mixcr.basictypes.VDJCAlignments;
import com.milaboratory.mixcr.export.FieldExtractor;
import com.milaboratory.mixcr.export.FieldExtractors;
import com.milaboratory.mixcr.export.OutputMode;

import java.io.*;
import java.util.ArrayList;
Expand All @@ -59,14 +60,23 @@ public ActionExportParameters(Class clazz, String defaultPreset) {
this.defaultPreset = defaultPreset;
this.clazz = clazz;
ArrayList<String>[] description = new ArrayList[]{new ArrayList(), new ArrayList()};

description[0].add("-h, --help");
description[0].add(FIELDS_SHORT + ", " + FIELDS_LONG);
description[0].add(PRESET_SHORT + ", " + PRESET_LONG);
description[0].add(PRESET_FILE_SHORT + ", " + PRESET_FILE_LONG);
description[1].add("print this help message");

description[0].add(FIELDS_SHORT + ", " + FIELDS_LONG);
description[1].add("print available fields to export");

description[0].add(PRESET_SHORT + ", " + PRESET_LONG);
description[1].add("preset parameters (full, min)");

description[0].add(PRESET_FILE_SHORT + ", " + PRESET_FILE_LONG);
description[1].add("file with preset parameters");

description[0].add(PARSING_SHORT + ", " + PARSING_LONG);
description[1].add("output short versions of column headers which facilitates analysis with Pandas, " +
"R/DataFrames or other data tables processing library.");

this.helpString =
"Usage: export(Type) [options] input_file output_file\n" +
"Options:\n" +
Expand Down Expand Up @@ -105,27 +115,36 @@ public final void parseParameters(String[] args) throws ParameterException {
return;
}
}

if (args.length < 2)
throw new ParameterException("No output file specified.");

if (args.length == 2)
exporters = getPresetParameters(clazz, defaultPreset);
OutputMode outputMode = OutputMode.HumanFriendly;

for (String arg : args)
if (isParsingFriendly(arg)) {
outputMode = OutputMode.ScriptingFriendly;
break;
}

if (args.length == 2 || (outputMode == OutputMode.ScriptingFriendly && args.length == 3))
exporters = getPresetParameters(outputMode, clazz, defaultPreset);
else
exporters = parseParametersString(clazz, args, 0, args.length - 2);
exporters = parseParametersString(outputMode, clazz, args, 0, args.length - 2);

inputFile = args[args.length - 2];
outputFile = args[args.length - 1];
}


public static ArrayList<FieldExtractor> parseParametersString(Class clazz, String[] args, int from, int to) {
public static ArrayList<FieldExtractor> parseParametersString(OutputMode outputMode, Class clazz, String[] args, int from, int to) {
ArrayList<FieldExtractor> exporters = new ArrayList<>();
ArrayList<String> exporter = new ArrayList<>();
for (int i = from; i < to; ++i) {
String arg = args[i];
if (arg.charAt(0) == '-') {
if (!exporter.isEmpty()) {
FieldExtractor exp = FieldExtractors.parse(clazz, exporter.toArray(new String[exporter.size()]));
FieldExtractor exp = FieldExtractors.parse(outputMode,
clazz, exporter.toArray(new String[exporter.size()]));
if (exp == null)
throw new ParameterException("Unknown export field: " + exporter);
exporters.add(exp);
Expand All @@ -134,12 +153,12 @@ public static ArrayList<FieldExtractor> parseParametersString(Class clazz, Strin
if (isPresetParameter(arg)) {
if (i == to - 1 || args[i + 1].charAt(0) == '-')
throw new ParameterException("Preset value not specified.");
exporters.addAll(getPresetParameters(clazz, args[++i]));
exporters.addAll(getPresetParameters(outputMode, clazz, args[++i]));
} else if (isPresetFileParameter(arg)) {
if (i == to - 1 || args[i + 1].charAt(0) == '-')
throw new ParameterException("Preset file name not specified.");
try {
exporters.addAll(readFromFile(clazz, args[++i]));
exporters.addAll(readFromFile(outputMode, clazz, args[++i]));
} catch (IOException e) {
throw new ParameterException(e.getMessage());
}
Expand All @@ -149,15 +168,16 @@ public static ArrayList<FieldExtractor> parseParametersString(Class clazz, Strin
exporter.add(arg);
}
if (!exporter.isEmpty()) {
FieldExtractor exp = FieldExtractors.parse(clazz, exporter.toArray(new String[exporter.size()]));
FieldExtractor exp = FieldExtractors.parse(outputMode,
clazz, exporter.toArray(new String[exporter.size()]));
if (exp == null)
throw new ParameterException("Unknown export field: " + exporter);
exporters.add(exp);
}
return exporters;
}

static List<FieldExtractor> readFromFile(Class clazz, String fileName) throws IOException {
static List<FieldExtractor> readFromFile(OutputMode outputMode, Class clazz, String fileName) throws IOException {
File file = new File(fileName);
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
List<FieldExtractor> exporters = new ArrayList<>();
Expand All @@ -174,57 +194,57 @@ static List<FieldExtractor> readFromFile(Class clazz, String fileName) throws IO
s = line.replace(" ", " ");
} while (s.length() != line.length());
String[] exporter = line.split(" ");
exporters.add(FieldExtractors.parse(clazz, exporter));
exporters.add(FieldExtractors.parse(outputMode, clazz, exporter));
}
return exporters;
}

static ArrayList<FieldExtractor> parsePresetString(Class clazz, String string) {
static ArrayList<FieldExtractor> parsePresetString(OutputMode outputMode, Class clazz, String string) {
string = string.replace("\n", " ").trim();
String s = string;
do {
string = s;
s = string.replace(" ", " ");
} while (s.length() != string.length());
String[] args = string.split(" ");
return parseParametersString(clazz, args, 0, args.length);
return parseParametersString(outputMode, clazz, args, 0, args.length);
}

private static final Map<Class, Map<String, ArrayList<FieldExtractor>>> preset;
private static final Map<Class, Map<String, String>> preset;

static {
preset = new HashMap<>();
Map<String, ArrayList<FieldExtractor>> clones = new HashMap<>();
clones.put("min", parsePresetString(Clone.class,
"-count -vHit -dHit -jHit -cHit -nfeature CDR3"));
clones.put("full", parsePresetString(Clone.class,
"-count -fraction -sequence -quality " +
"-vHits -dHits -jHits -cHits " +
"-vAlignments -dAlignments -jAlignments -cAlignments " +
"-nFeature FR1 -minFeatureQuality FR1 -nFeature CDR1 -minFeatureQuality CDR1 " +
"-nFeature FR2 -minFeatureQuality FR2 -nFeature CDR2 -minFeatureQuality CDR2 " +
"-nFeature FR3 -minFeatureQuality FR3 -nFeature CDR3 -minFeatureQuality CDR3 " +
"-nFeature FR4 -minFeatureQuality FR4 " +
"-aaFeature FR1 -aaFeature CDR1 -aaFeature FR2 -aaFeature CDR2 -aaFeature FR3 -aaFeature CDR3 -aaFeature FR4 "));

Map<String, String> clones = new HashMap<>();
clones.put("min", "-count -vHit -dHit -jHit -cHit -nFeature CDR3");
clones.put("full", "-count -fraction -sequence -quality " +
"-vHitsWithScore -dHitsWithScore -jHitsWithScore -cHitsWithScore " +
"-vAlignments -dAlignments -jAlignments -cAlignments " +
"-nFeature FR1 -minFeatureQuality FR1 -nFeature CDR1 -minFeatureQuality CDR1 " +
"-nFeature FR2 -minFeatureQuality FR2 -nFeature CDR2 -minFeatureQuality CDR2 " +
"-nFeature FR3 -minFeatureQuality FR3 -nFeature CDR3 -minFeatureQuality CDR3 " +
"-nFeature FR4 -minFeatureQuality FR4 " +
"-aaFeature FR1 -aaFeature CDR1 -aaFeature FR2 -aaFeature CDR2 " +
"-aaFeature FR3 -aaFeature CDR3 -aaFeature FR4 ");
preset.put(Clone.class, clones);

Map<String, ArrayList<FieldExtractor>> alignments = new HashMap<>();
alignments.put("min", parsePresetString(VDJCAlignments.class,
"-vHit -dHit -jHit -cHit -nfeature CDR3"));
alignments.put("full", parsePresetString(VDJCAlignments.class,
"-sequence -quality " +
"-vHits -dHits -jHits -cHits " +
"-vAlignments -dAlignments -jAlignments -cAlignments " +
"-nFeature FR1 -minFeatureQuality FR1 -nFeature CDR1 -minFeatureQuality CDR1 " +
"-nFeature FR2 -minFeatureQuality FR2 -nFeature CDR2 -minFeatureQuality CDR2 " +
"-nFeature FR3 -minFeatureQuality FR3 -nFeature CDR3 -minFeatureQuality CDR3 " +
"-nFeature FR4 -minFeatureQuality FR4 " +
"-aaFeature FR1 -aaFeature CDR1 -aaFeature FR2 -aaFeature CDR2 -aaFeature FR3 -aaFeature CDR3 -aaFeature FR4 "));
Map<String, String> alignments = new HashMap<>();
alignments.put("min", "-vHit -dHit -jHit -cHit -nFeature CDR3");
alignments.put("full", "-sequence -quality " +
"-vHitsWithScore -dHitsWithScore -jHitsWithScore -cHitsWithScore " +
"-vAlignments -dAlignments -jAlignments -cAlignments " +
"-nFeature FR1 -minFeatureQuality FR1 -nFeature CDR1 -minFeatureQuality CDR1 " +
"-nFeature FR2 -minFeatureQuality FR2 -nFeature CDR2 -minFeatureQuality CDR2 " +
"-nFeature FR3 -minFeatureQuality FR3 -nFeature CDR3 -minFeatureQuality CDR3 " +
"-nFeature FR4 -minFeatureQuality FR4 " +
"-aaFeature FR1 -aaFeature CDR1 -aaFeature FR2 -aaFeature CDR2 " +
"-aaFeature FR3 -aaFeature CDR3 -aaFeature FR4 ")
;
preset.put(VDJCAlignments.class, alignments);
}

public static ArrayList<FieldExtractor> getPresetParameters(Class clazz, String string) {
return preset.get(clazz).get(string);
public static ArrayList<FieldExtractor> getPresetParameters(OutputMode mode, Class clazz, String string) {
return parsePresetString(mode, clazz, preset.get(clazz).get(string));
}

private static void trim(String[] args) {
Expand All @@ -237,12 +257,18 @@ private static void trim(String[] args) {
PRESET_FILE_SHORT = "-pf",
PRESET_FILE_LONG = "--presetFile",
FIELDS_SHORT = "-l",
FIELDS_LONG = "--listFields";
FIELDS_LONG = "--listFields",
PARSING_LONG = "--no-spaces",
PARSING_SHORT = "-s";

public static boolean isPresetParameter(String string) {
return string.equals(PRESET_SHORT) || string.equals(PRESET_LONG);
}

public static boolean isParsingFriendly(String string) {
return string.equals(PARSING_SHORT) || string.equals(PARSING_LONG);
}

public static boolean isPresetFileParameter(String string) {
return string.equals(PRESET_FILE_SHORT) || string.equals(PRESET_FILE_LONG);
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/milaboratory/mixcr/export/Field.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@ public interface Field<T> {

String getDescription();

FieldExtractor<T> create(String[] args);
FieldExtractor<T> create(OutputMode outputMode, String[] args);
}

0 comments on commit 134c3e9

Please sign in to comment.