Permalink
Browse files

Adds merge.count command

  • Loading branch information...
1 parent a7d353d commit b27d5eccf9cbe98edb49140355b488e54e32a723 @mothur-westcott mothur-westcott committed Aug 3, 2016
@@ -379,6 +379,8 @@
488841621CC515A000C5E972 /* (null) in Sources */ = {isa = PBXBuildFile; };
488841651CC6C34900C5E972 /* renamefilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 488841631CC6C34900C5E972 /* renamefilecommand.cpp */; };
488841661CC6C35500C5E972 /* renamefilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 488841631CC6C34900C5E972 /* renamefilecommand.cpp */; };
+ 48910D431D5243E500F60EDB /* mergecountcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48910D411D5243E500F60EDB /* mergecountcommand.cpp */; };
+ 48910D441D5243E500F60EDB /* mergecountcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48910D411D5243E500F60EDB /* mergecountcommand.cpp */; };
4893DE2918EEF28100C615DF /* (null) in Sources */ = {isa = PBXBuildFile; };
489B55721BCD7F0100FB7DC8 /* vsearchfileparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 489B55701BCD7F0100FB7DC8 /* vsearchfileparser.cpp */; };
48A11C6E1CDA40F0003481D8 /* testrenamefilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48A11C6C1CDA40F0003481D8 /* testrenamefilecommand.cpp */; };
@@ -821,6 +823,8 @@
48844B261AA74AF9006EF2B8 /* compare.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = compare.h; path = source/datastructures/compare.h; sourceTree = SOURCE_ROOT; };
488841631CC6C34900C5E972 /* renamefilecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = renamefilecommand.cpp; path = source/commands/renamefilecommand.cpp; sourceTree = SOURCE_ROOT; };
488841641CC6C34900C5E972 /* renamefilecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = renamefilecommand.h; path = source/commands/renamefilecommand.h; sourceTree = SOURCE_ROOT; };
+ 48910D411D5243E500F60EDB /* mergecountcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mergecountcommand.cpp; path = source/commands/mergecountcommand.cpp; sourceTree = "<group>"; };
+ 48910D421D5243E500F60EDB /* mergecountcommand.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = mergecountcommand.hpp; path = source/commands/mergecountcommand.hpp; sourceTree = "<group>"; };
489B55701BCD7F0100FB7DC8 /* vsearchfileparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = vsearchfileparser.cpp; path = source/vsearchfileparser.cpp; sourceTree = "<group>"; };
489B55711BCD7F0100FB7DC8 /* vsearchfileparser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = vsearchfileparser.h; path = source/vsearchfileparser.h; sourceTree = "<group>"; };
48A11C6C1CDA40F0003481D8 /* testrenamefilecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testrenamefilecommand.cpp; path = testcommands/testrenamefilecommand.cpp; sourceTree = "<group>"; };
@@ -1964,6 +1968,8 @@
A7E6F69D17427D06006775E2 /* makelookupcommand.cpp */,
A7E9B74A12D37EC400DA6239 /* matrixoutputcommand.h */,
A7E9B74912D37EC400DA6239 /* matrixoutputcommand.cpp */,
+ 48910D411D5243E500F60EDB /* mergecountcommand.cpp */,
+ 48910D421D5243E500F60EDB /* mergecountcommand.hpp */,
48705ABF19BE32C50075E977 /* mergesfffilecommand.cpp */,
48705AC019BE32C50075E977 /* mergesfffilecommand.h */,
A7E9B75412D37EC400DA6239 /* mergefilecommand.h */,
@@ -2666,6 +2672,7 @@
481FB6881AC1B8B80076CFF3 /* weightedlinkage.cpp in Sources */,
480E8DB21CAB1F5E00A0D137 /* vsearchfileparser.cpp in Sources */,
481FB61E1AC1B7AC0076CFF3 /* unifracweightedcommand.cpp in Sources */,
+ 48910D441D5243E500F60EDB /* mergecountcommand.cpp in Sources */,
481FB5951AC1B71B0076CFF3 /* chimerabellerophoncommand.cpp in Sources */,
481FB68D1AC1BA9E0076CFF3 /* classify.cpp in Sources */,
481FB65F1AC1B8450076CFF3 /* myseqdist.cpp in Sources */,
@@ -3011,6 +3018,7 @@
A7E9B8C512D37EC400DA6239 /* fileoutput.cpp in Sources */,
A7E9B8C612D37EC400DA6239 /* filterseqscommand.cpp in Sources */,
A7E9B8C812D37EC400DA6239 /* flowdata.cpp in Sources */,
+ 48910D431D5243E500F60EDB /* mergecountcommand.cpp in Sources */,
A7E9B8C912D37EC400DA6239 /* formatcolumn.cpp in Sources */,
A7E9B8CA12D37EC400DA6239 /* formatphylip.cpp in Sources */,
A7E9B8CB12D37EC400DA6239 /* fullmatrix.cpp in Sources */,
@@ -152,6 +152,7 @@
#include "biominfocommand.h"
#include "renamefilecommand.h"
#include "chimeravsearchcommand.h"
+#include "mergecountcommand.hpp"
//needed for testing project
//CommandFactory* CommandFactory::_uniqueInstance;
@@ -327,6 +328,7 @@ CommandFactory::CommandFactory(){
commands["biom.info"] = "biom.info";
commands["set.seed"] = "set.seed";
commands["rename.file"] = "rename.file";
+ commands["merge.count"] = "merge.count";
}
@@ -526,6 +528,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
else if(commandName == "corr.axes") { command = new CorrAxesCommand(optionString); }
else if(commandName == "remove.rare") { command = new RemoveRareCommand(optionString); }
else if(commandName == "merge.groups") { command = new MergeGroupsCommand(optionString); }
+ else if(commandName == "merge.count") { command = new MergeCountCommand(optionString); }
else if(commandName == "amova") { command = new AmovaCommand(optionString); }
else if(commandName == "homova") { command = new HomovaCommand(optionString); }
else if(commandName == "mantel") { command = new MantelCommand(optionString); }
@@ -699,6 +702,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
else if(commandName == "corr.axes") { pipecommand = new CorrAxesCommand(optionString); }
else if(commandName == "remove.rare") { pipecommand = new RemoveRareCommand(optionString); }
else if(commandName == "merge.groups") { pipecommand = new MergeGroupsCommand(optionString); }
+ else if(commandName == "merge.count") { pipecommand = new MergeCountCommand(optionString); }
else if(commandName == "amova") { pipecommand = new AmovaCommand(optionString); }
else if(commandName == "homova") { pipecommand = new HomovaCommand(optionString); }
else if(commandName == "mantel") { pipecommand = new MantelCommand(optionString); }
@@ -858,6 +862,7 @@ Command* CommandFactory::getCommand(string commandName){
else if(commandName == "corr.axes") { shellcommand = new CorrAxesCommand(); }
else if(commandName == "remove.rare") { shellcommand = new RemoveRareCommand(); }
else if(commandName == "merge.groups") { shellcommand = new MergeGroupsCommand(); }
+ else if(commandName == "merge.count") { shellcommand = new MergeCountCommand(); }
else if(commandName == "amova") { shellcommand = new AmovaCommand(); }
else if(commandName == "homova") { shellcommand = new HomovaCommand(); }
else if(commandName == "mantel") { shellcommand = new MantelCommand(); }
@@ -153,6 +153,7 @@
#include "biominfocommand.h"
#include "renamefilecommand.h"
#include "chimeravsearchcommand.h"
+#include "mergecountcommand.hpp"
//**********************************************************************************************************************
@@ -271,6 +272,7 @@ int HelpCommand::execute(){
else if(commandName == "corr.axes") { command = new CorrAxesCommand(optionString); }
else if(commandName == "remove.rare") { command = new RemoveRareCommand(optionString); }
else if(commandName == "merge.groups") { command = new MergeGroupsCommand(optionString); }
+ else if(commandName == "merge.count") { command = new MergeCountCommand(optionString); }
else if(commandName == "amova") { command = new AmovaCommand(optionString); }
else if(commandName == "homova") { command = new HomovaCommand(optionString); }
else if(commandName == "mantel") { command = new MantelCommand(optionString); }
@@ -0,0 +1,226 @@
+//
+// mergecountcommand.cpp
+// Mothur
+//
+// Created by Sarah Westcott on 8/3/16.
+// Copyright © 2016 Schloss Lab. All rights reserved.
+//
+
+#include "mergecountcommand.hpp"
+#include "counttable.h"
+
+//**********************************************************************************************************************
+vector<string> MergeCountCommand::setParameters(){
+ try {
+ CommandParameter pcount("count", "InputTypes", "", "", "", "", "","count",false,false,true); parameters.push_back(pcount);
+ CommandParameter poutput("output", "String", "", "", "", "", "","",false,true,true); parameters.push_back(poutput);
+ CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MergeCountCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string MergeCountCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The merge.count command takes a list of count files separated by dashes and merges them into one file.";
+ helpString += "The merge.count command parameters are count and output.";
+ helpString += "Example merge.count(count=final.count_table-new.count_table, output=complete.count_table).";
+ helpString += "Note: No spaces between parameter labels (i.e. output), '=' and parameters (i.e.yourOutputFileName).\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MergeCountCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+MergeCountCommand::MergeCountCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["count"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MergeCountCommand", "MergeCountCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+MergeCountCommand::MergeCountCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+
+ if(option == "help") {
+ help();
+ abort = true; calledHelp = true;
+ }else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+ else {
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+
+ //check to make sure all parameters are valid for command
+ for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ //initialize outputTypes
+ vector<string> tempOutNames;
+ outputTypes["count"] = tempOutNames;
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+
+ string fileList = validParameter.validFile(parameters, "count", false);
+ if(fileList == "not found") { m->mothurOut("[ERROR]: you must enter two or more count file names"); m->mothurOutEndLine(); abort=true; }
+ else{ m->splitAtDash(fileList, fileNames); }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ string outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found") { outputDir = ""; }
+
+
+ numInputFiles = fileNames.size();
+ ifstream testFile;
+ if(numInputFiles == 0){
+ m->mothurOut("you must enter two or more file names and you entered " + toString(fileNames.size()) + " file names"); m->mothurOutEndLine();
+ abort=true;
+ }
+ else{
+ for(int i=0;i<numInputFiles;i++){
+ if (inputDir != "") {
+ string path = m->hasPath(fileNames[i]);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { fileNames[i] = inputDir + fileNames[i]; }
+ }
+
+ map<string, string> file; file["file"] = fileNames[i];
+ fileNames[i] = validParameter.validFile(file, "file", true);
+ if(fileNames[i] == "not found"){ abort = true; }
+ }
+ }
+
+ outputFileName = validParameter.validFile(parameters, "output", false);
+ if (outputFileName == "not found") { m->mothurOut("you must enter an output file name"); m->mothurOutEndLine(); abort=true; }
+ else if (outputDir != "") { outputFileName = outputDir + m->getSimpleName(outputFileName); }
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MergeCountCommand", "MergeCountCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int MergeCountCommand::execute(){
+ try {
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ m->mothurRemove(outputFileName);
+
+ //read headers from each file to confirm all contain groupinfo or all do not
+ //Also collect all group names
+ bool allContainGroups = true; bool allNoGroups = true;
+ set<string> allGroups;
+ for(int i = 0; i < numInputFiles; i++) {
+
+ if (m->control_pressed) { return 0; }
+
+ vector<string> thisTablesGroups;
+ CountTable table;
+ bool hasGroups = table.testGroups(fileNames[i], thisTablesGroups);
+
+ if (hasGroups) {
+ allNoGroups = false;
+ for (int j = 0; j < thisTablesGroups.size(); j++) { allGroups.insert(thisTablesGroups[j]); }
+ }else { allContainGroups = false; }
+ }
+ int numGroups = allGroups.size();
+
+ //check to make sure all files are one type - quit if not
+ if (!allContainGroups && !allNoGroups) { m->mothurOut("[ERROR]: your have countfiles that contains group information and count files that do not. These cannot be combined without loss of information, please correct.\n"); m->control_pressed = true; return 0; }
+
+ if (m->control_pressed) { return 0; }
+
+ //Create Blank Table - (set<string>&, map<string, string>&, set<string>&); //seqNames, seqName->group, groupNames
+ set<string> seqNames; map<string, string> seqGroup; set<string> g;
+ CountTable completeTable;
+ completeTable.createTable(seqNames, seqGroup, g);
+
+ //append first one to get headers
+ map<string, int> groupIndex;
+ if (allNoGroups) { m->appendBinaryFiles(fileNames[0], outputFileName); }
+ else { //create groupMap to save time setting abundance vector
+ int count = 0;
+ for (set<string>::iterator it = allGroups.begin(); it != allGroups.end(); it++) {
+ completeTable.addGroup(*it);
+ groupIndex[*it] = count; count++;
+ }
+ }
+
+ //for each file
+ for(int i = 0; i < numInputFiles; i++) {
+
+ if (m->control_pressed) { break; }
+
+ if (allContainGroups) {
+
+ CountTable table; table.readTable(fileNames[i], true, false);
+ vector<string> groups = table.getNamesOfGroups();
+
+ vector<string> seqs = table.getNamesOfSeqs();
+ for (int j = 0; j < seqs.size(); j++) {
+ if (m->control_pressed) { break; }
+ vector<int> abunds = table.getGroupCounts(seqs[j]);
+ vector<int> newAbunds; newAbunds.resize(numGroups, 0);
+ for (int k = 0; k < abunds.size(); k++) {
+ if (abunds[k] != 0) { //we need to set abundance in vector with all groups
+ //groups and abunds are in matching order. we know all groups are in groupIndex from above.
+ int newIndex = groupIndex[groups[k]];
+ newAbunds[newIndex] = abunds[k];
+ }
+ }
+ completeTable.push_back(seqs[j], newAbunds);
+ }
+ }
+ else { m->appendFilesWithoutHeaders(fileNames[i], outputFileName); } //No group info so simple append
+ }
+
+ if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
+
+ //print new table
+ if (allContainGroups) { completeTable.printTable(outputFileName); }
+
+ if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
+
+ //update current count file
+ m->setCountTableFile(outputFileName);
+
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["merge"].push_back(outputFileName);
+ m->mothurOutEndLine();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MergeCountCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
@@ -0,0 +1,48 @@
+//
+// mergecountcommand.hpp
+// Mothur
+//
+// Created by Sarah Westcott on 8/3/16.
+// Copyright © 2016 Schloss Lab. All rights reserved.
+//
+
+#ifndef mergecountcommand_hpp
+#define mergecountcommand_hpp
+
+#include "command.hpp"
+
+class MergeCountCommand : public Command {
+
+#ifdef UNIT_TEST
+ //friend class TestMergeCountCommand;
+#endif
+
+public:
+ MergeCountCommand(string);
+ MergeCountCommand();
+ ~MergeCountCommand() {}
+
+ vector<string> setParameters();
+ string getCommandName() { return "merge.count"; }
+ string getCommandCategory() { return "General"; }
+
+ string getHelpString();
+ string getOutputPattern(string) { return ""; }
+ string getCitation() { return "http://www.mothur.org/wiki/Merge.count"; }
+ string getDescription() { return "reads count files and combines them into a single count file"; }
+
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+private:
+
+ bool abort;
+ string outputDir, inputDir, countfile, output, outputFileName;
+ vector<string> outputNames, fileNames;
+ int numInputFiles;
+
+};
+
+
+#endif /* mergecountcommand_hpp */
Oops, something went wrong.

0 comments on commit b27d5ec

Please sign in to comment.