Skip to content

Commit

Permalink
Created vsearchFileParser class to encapsulate code
Browse files Browse the repository at this point in the history
Issue #169
  • Loading branch information
mothur-westcott committed Oct 14, 2015
1 parent d502226 commit 9f653f5
Show file tree
Hide file tree
Showing 6 changed files with 199 additions and 97 deletions.
6 changes: 6 additions & 0 deletions Mothur.xcodeproj/project.pbxproj
Expand Up @@ -380,6 +380,7 @@
48705AC819BE32C50075E977 /* abstractrandomforest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48705AC319BE32C50075E977 /* abstractrandomforest.cpp */; };
487C5A871AB88B93002AF48A /* mimarksattributescommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 487C5A851AB88B93002AF48A /* mimarksattributescommand.cpp */; };
4893DE2918EEF28100C615DF /* (null) in Sources */ = {isa = PBXBuildFile; };
489B55721BCD7F0100FB7DC8 /* vsearchfileparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 489B55701BCD7F0100FB7DC8 /* vsearchfileparser.cpp */; };
48A52F9A1BC428DF00548F6C /* abundance.cc in Sources */ = {isa = PBXBuildFile; fileRef = 48A52F4D1BC428DF00548F6C /* abundance.cc */; };
48A52F9B1BC428DF00548F6C /* abundance.cc in Sources */ = {isa = PBXBuildFile; fileRef = 48A52F4D1BC428DF00548F6C /* abundance.cc */; };
48A52F9C1BC428DF00548F6C /* align_simd.cc in Sources */ = {isa = PBXBuildFile; fileRef = 48A52F4F1BC428DF00548F6C /* align_simd.cc */; };
Expand Down Expand Up @@ -892,6 +893,8 @@
487C5A851AB88B93002AF48A /* mimarksattributescommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mimarksattributescommand.cpp; path = /Users/sarahwestcott/Desktop/mothur/source/commands/mimarksattributescommand.cpp; sourceTree = "<absolute>"; };
487C5A861AB88B93002AF48A /* mimarksattributescommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mimarksattributescommand.h; path = /Users/sarahwestcott/Desktop/mothur/source/commands/mimarksattributescommand.h; sourceTree = "<absolute>"; };
48844B261AA74AF9006EF2B8 /* compare.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = compare.h; path = source/datastructures/compare.h; sourceTree = "<group>"; };
489B55701BCD7F0100FB7DC8 /* vsearchfileparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = vsearchfileparser.cpp; path = source/vsearch_src/vsearchfileparser.cpp; sourceTree = "<group>"; };
489B55711BCD7F0100FB7DC8 /* vsearchfileparser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = vsearchfileparser.h; path = source/vsearch_src/vsearchfileparser.h; sourceTree = "<group>"; };
48A52F4D1BC428DF00548F6C /* abundance.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = abundance.cc; path = source/vsearch_src/abundance.cc; sourceTree = "<group>"; };
48A52F4E1BC428DF00548F6C /* abundance.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = abundance.h; path = source/vsearch_src/abundance.h; sourceTree = "<group>"; };
48A52F4F1BC428DF00548F6C /* align_simd.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = align_simd.cc; path = source/vsearch_src/align_simd.cc; sourceTree = "<group>"; };
Expand Down Expand Up @@ -1983,6 +1986,8 @@
48A52F961BC428DF00548F6C /* util.h */,
48A52F971BC428DF00548F6C /* vsearch.cc */,
48A52F981BC428DF00548F6C /* vsearch.h */,
489B55701BCD7F0100FB7DC8 /* vsearchfileparser.cpp */,
489B55711BCD7F0100FB7DC8 /* vsearchfileparser.h */,
48A52F991BC428DF00548F6C /* xstring.h */,
);
name = vsearch;
Expand Down Expand Up @@ -3437,6 +3442,7 @@
48A52FC61BC428DF00548F6C /* msa.cc in Sources */,
A7E9B93C12D37EC400DA6239 /* seqerrorcommand.cpp in Sources */,
48A52FC21BC428DF00548F6C /* md5.c in Sources */,
489B55721BCD7F0100FB7DC8 /* vsearchfileparser.cpp in Sources */,
A7E9B93D12D37EC400DA6239 /* seqsummarycommand.cpp in Sources */,
A7E9B93E12D37EC400DA6239 /* sequence.cpp in Sources */,
A7E9B93F12D37EC400DA6239 /* sequencedb.cpp in Sources */,
Expand Down
107 changes: 13 additions & 94 deletions source/commands/clustercommand.cpp
Expand Up @@ -15,6 +15,7 @@
#include "deconvolutecommand.h"
#include "sequence.hpp"
#include "vsearch.h"
#include "vsearchfileparser.h"

//**********************************************************************************************************************
vector<string> ClusterCommand::setParameters(){
Expand Down Expand Up @@ -371,22 +372,15 @@ int ClusterCommand::execute(){

int ClusterCommand::runVsearchCluster(){
try {

//Run unique.seqs on the data if a name or count file is not given
map<string, int> nameMap;
if ((namefile == "") && (countfile == "")) { countfile = getNamesFile(fastafile); }
else if (namefile != "") { nameMap = m->readNames(namefile); }

if (countfile != "") { CountTable countTable; countTable.readTable(countfile, false, false); nameMap = countTable.getNameMap(); }

string vsearchFastafile = ""; VsearchFileParser* vParse;
if ((namefile == "") && (countfile == "")) { vParse = new VsearchFileParser(fastafile); }
else if (namefile != "") { vParse = new VsearchFileParser(fastafile, namefile, "name"); }
else if (countfile != "") { vParse = new VsearchFileParser(fastafile, countfile, "count"); }
else { m->mothurOut("[ERROR]: Opps, should never get here. ClusterCommand::runVsearchCluster() \n"); m->control_pressed = true; }

if (m->control_pressed) { return 0; }

//Remove gap characters from each sequence if needed
//Append the number of sequences that each unique sequence represents to the end of the fasta file name
//Sorts by abundance
string vsearchFastafile = createVsearchFasta(fastafile, nameMap); nameMap.clear();

if (m->control_pressed) { return 0; }
vsearchFastafile = vParse->getVsearchFile(); delete vParse;

if (cutoff > 1.0) { m->mothurOut("You did not set a cutoff, using 0.20.\n"); cutoff = 0.20; }

Expand Down Expand Up @@ -418,7 +412,7 @@ int ClusterCommand::vsearchDriver(string inputFile, string ucClusteredFile, stri
//vsearch --maxaccepts 16 --usersort --id 0.97 --minseqlength 30 --wordlength 8 --uc $ROOT.clustered.uc --cluster_smallmem $ROOT.sorted.fna --maxrejects 64 --strand both --log $ROOT.clustered.log --sizeorder


int numArgs = 11;
int numArgs = 11; if (method == "dgc") { numArgs = 10; } //no sizeorder for dgc
char** vsearchParameters;
vsearchParameters = new char*[numArgs];

Expand Down Expand Up @@ -471,8 +465,10 @@ int ClusterCommand::vsearchDriver(string inputFile, string ucClusteredFile, stri
*vsearchParameters[parameterCount] = '\0'; strncat(vsearchParameters[parameterCount], tempIn.c_str(), tempIn.length());
parameterCount++;

//--sizeorder
vsearchParameters[parameterCount] = new char[12]; *vsearchParameters[parameterCount] = '\0'; strncat(vsearchParameters[parameterCount], "--sizeorder", 11); parameterCount++;
if (method == "agc") {
//--sizeorder
vsearchParameters[parameterCount] = new char[12]; *vsearchParameters[parameterCount] = '\0'; strncat(vsearchParameters[parameterCount], "--sizeorder", 11); parameterCount++;
}

errno = 0;
vsearch_main(numArgs, vsearchParameters);
Expand All @@ -488,48 +484,6 @@ int ClusterCommand::vsearchDriver(string inputFile, string ucClusteredFile, stri
exit(1);
}
}

//**********************************************************************************************************************

string ClusterCommand::createVsearchFasta(string inputFile, map<string, int>& nameMap){
try {
string vsearchFasta = m->getSimpleName(fastafile) + method + ".sorted.fasta";

vector<seqPriorityNode> seqs;
map<string, int>::iterator it;

ifstream in;
m->openInputFile(inputFile, in);

while (!in.eof()) {

if (m->control_pressed) { in.close(); return vsearchFasta; }

Sequence seq(in); m->gobble(in);

it = nameMap.find(seq.getName());
if (it == nameMap.end()) {
m->mothurOut("[ERROR]: " + seq.getName() + " is not in your name or countfile, quitting.\n"); m->control_pressed = true;
}else {
seqPriorityNode temp(it->second, seq.getUnaligned(), it->first);
seqs.push_back(temp);
nameMap.erase(it);
}

}
in.close();

m->printVsearchFile(seqs, vsearchFasta);

return vsearchFasta;
}
catch(exception& e) {
m->errorOut(e, "ClusterCommand", "createVsearchFasta");
exit(1);
}
}


//**********************************************************************************************************************

int ClusterCommand::runMothurCluster(){
Expand Down Expand Up @@ -727,41 +681,6 @@ void ClusterCommand::printData(string label, map<string, int>& counts){
}
//**********************************************************************************************************************

string ClusterCommand::getNamesFile(string& inputFile){
try {
string nameFile = "";

m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();

//use unique.seqs to create new name and fastafile
string inputString = "fasta=" + inputFile + ", format=count";
m->mothurOut("/******************************************/"); m->mothurOutEndLine();
m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
m->mothurCalling = true;

Command* uniqueCommand = new DeconvoluteCommand(inputString);
uniqueCommand->execute();

map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();

delete uniqueCommand;
m->mothurCalling = false;
m->mothurOut("/******************************************/"); m->mothurOutEndLine();

countfile = filenames["count"][0];
fastafile = filenames["fasta"][0];
distfile = fastafile;

return nameFile;
}
catch(exception& e) {
m->errorOut(e, "ClusterCommand", "getNamesFile");
exit(1);
}
}

//**********************************************************************************************************************

int ClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){
try {
rabund->setLabel(list->getLabel());
Expand Down
2 changes: 0 additions & 2 deletions source/commands/clustercommand.h
Expand Up @@ -69,8 +69,6 @@ class ClusterCommand : public Command {
vector<string> outputNames;

int createRabund(CountTable*&, ListVector*&, RAbundVector*&);
string getNamesFile(string& inputFile);
string createVsearchFasta(string, map<string, int>&);
int vsearchDriver(string, string, string);
int runVsearchCluster();
int runMothurCluster();
Expand Down
1 change: 0 additions & 1 deletion source/mothur.h
Expand Up @@ -78,7 +78,6 @@
#include <direct.h> //get cwd
#include <windows.h>
#include <psapi.h>
#include <direct.h>
#include <tchar.h>

#endif
Expand Down
142 changes: 142 additions & 0 deletions source/vsearch_src/vsearchfileparser.cpp
@@ -0,0 +1,142 @@
//
// vsearchfileparser.cpp
// Mothur
//
// Created by Sarah Westcott on 10/13/15.
// Copyright (c) 2015 Schloss Lab. All rights reserved.
//

#include "vsearchfileparser.h"
#include "deconvolutecommand.h"
#include "sequence.hpp"

/***********************************************************************/
VsearchFileParser::VsearchFileParser(string f){
try {
m = MothurOut::getInstance();
fastafile = f;
namefile = "";
countfile = "";
}
catch(exception& e) {
m->errorOut(e, "VsearchFileParser", "VsearchFileParser");
exit(1);
}
}
/***********************************************************************/
VsearchFileParser::VsearchFileParser(string f, string nameOrCount, string format) {
try {
m = MothurOut::getInstance();
fastafile = f;
namefile = "";
countfile = "";

if (format == "name") { namefile = nameOrCount; }
else if (format == "count") { countfile = nameOrCount; }
else { m->mothurOut("[ERROR]: " + format + " is not a valid file format for the VsearchFileParser, quitting.\n"); m->control_pressed = true; }

}
catch(exception& e) {
m->errorOut(e, "VsearchFileParser", "VsearchFileParser");
exit(1);
}
}
/***********************************************************************/
string VsearchFileParser::getVsearchFile() {
try {
//Run unique.seqs on the data if a name or count file is not given
map<string, int> nameMap;
if ((namefile == "") && (countfile == "")) { countfile = getNamesFile(fastafile); }
else if (namefile != "") { nameMap = m->readNames(namefile); }

if (countfile != "") { CountTable countTable; countTable.readTable(countfile, false, false); nameMap = countTable.getNameMap(); }

if (m->control_pressed) { return 0; }

//Remove gap characters from each sequence if needed
//Append the number of sequences that each unique sequence represents to the end of the fasta file name
//Sorts by abundance
string vsearchFastafile = createVsearchFasta(fastafile, nameMap); nameMap.clear();

return vsearchFastafile;

}
catch(exception& e) {
m->errorOut(e, "VsearchFileParser", "getVsearchFile");
exit(1);
}
}
/**********************************************************************/

string VsearchFileParser::createVsearchFasta(string inputFile, map<string, int>& nameMap){
try {
string vsearchFasta = m->getSimpleName(fastafile) + ".sorted.fasta.temp";

vector<seqPriorityNode> seqs;
map<string, int>::iterator it;

ifstream in;
m->openInputFile(inputFile, in);

while (!in.eof()) {

if (m->control_pressed) { in.close(); return vsearchFasta; }

Sequence seq(in); m->gobble(in);

it = nameMap.find(seq.getName());
if (it == nameMap.end()) {
m->mothurOut("[ERROR]: " + seq.getName() + " is not in your name or countfile, quitting.\n"); m->control_pressed = true;
}else {
seqPriorityNode temp(it->second, seq.getUnaligned(), it->first);
seqs.push_back(temp);
nameMap.erase(it);
}

}
in.close();

m->printVsearchFile(seqs, vsearchFasta);

return vsearchFasta;
}
catch(exception& e) {
m->errorOut(e, "VsearchFileParser", "createVsearchFasta");
exit(1);
}
}
/*************************************************************************/

string VsearchFileParser::getNamesFile(string& inputFile){
try {
string nameFile = "";

m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();

//use unique.seqs to create new name and fastafile
string inputString = "fasta=" + inputFile + ", format=count";
m->mothurOut("/******************************************/"); m->mothurOutEndLine();
m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
m->mothurCalling = true;

Command* uniqueCommand = new DeconvoluteCommand(inputString);
uniqueCommand->execute();

map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();

delete uniqueCommand;
m->mothurCalling = false;
m->mothurOut("/******************************************/"); m->mothurOutEndLine();

countfile = filenames["count"][0];
fastafile = filenames["fasta"][0];

return nameFile;
}
catch(exception& e) {
m->errorOut(e, "VsearchFileParser", "getNamesFile");
exit(1);
}
}
/***********************************************************************/

38 changes: 38 additions & 0 deletions source/vsearch_src/vsearchfileparser.h
@@ -0,0 +1,38 @@
//
// vsearchfileparser.h
// Mothur
//
// Created by Sarah Westcott on 10/13/15.
// Copyright (c) 2015 Schloss Lab. All rights reserved.
//

#ifndef __Mothur__vsearchfileparser__
#define __Mothur__vsearchfileparser__

#include "mothurout.h"


/**************************************************************************************************/

class VsearchFileParser {

public:
VsearchFileParser(string f);
VsearchFileParser(string f, string n, string format);
~VsearchFileParser(){}

string getVsearchFile();

private:
MothurOut* m;
string fastafile, namefile, countfile;
string getNamesFile(string& inputFile);
string createVsearchFasta(string, map<string, int>&);


};

/**************************************************************************************************/


#endif /* defined(__Mothur__vsearchfileparser__) */

0 comments on commit 9f653f5

Please sign in to comment.