Permalink
Browse files

Merge pull request #279 from mothur/precluster_mem

Precluster mem
  • Loading branch information...
2 parents 924c670 + 0a2af8a commit 86b5ed320fab01cf6f3b602dceb4dfef0a4a3e53 @mothur-westcott mothur-westcott committed on GitHub Oct 4, 2016
@@ -520,7 +520,8 @@ int ChimeraPerseusCommand::execute(){
ct->readTable(nameFile, true, false);
if (ct->hasGroupInfo()) {
- cparser = new SequenceCountParser(fastaFileNames[s], *ct);
+ vector<string> temp;
+ cparser = new SequenceCountParser(fastaFileNames[s], *ct, temp);
variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
newCountFile = getOutputFileName("count", variables);
@@ -599,7 +600,8 @@ int ChimeraPerseusCommand::execute(){
}else {
if (groupFile != "") {
//Parse sequences by group
- parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
+ vector<string> temp;
+ parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile, temp);
vector<string> groups = parser->getNamesOfGroups();
if (m->control_pressed) { delete parser; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
@@ -1042,7 +1042,8 @@ int ChimeraSlayerCommand::setUpForSelfReference(SequenceParser*& parser, map<str
fileGroup[fastaFileNames[s]] = "noGroup";
}else {
//Parse sequences by group
- parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
+ vector<string> temp;
+ parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile, temp);
vector<string> groups = parser->getNamesOfGroups();
for (int i = 0; i < groups.size(); i++) {
@@ -1088,7 +1089,8 @@ int ChimeraSlayerCommand::setUpForSelfReference(SequenceCountParser*& parser, ma
fileGroup[fastaFileNames[s]] = "noGroup";
}else {
//Parse sequences by group
- parser = new SequenceCountParser(nameFile, fastaFileNames[s]);
+ vector<string> temp;
+ parser = new SequenceCountParser(nameFile, fastaFileNames[s], temp);
vector<string> groups = parser->getNamesOfGroups();
for (int i = 0; i < groups.size(); i++) {
@@ -689,12 +689,13 @@ int ChimeraUchimeCommand::execute(){
//Parse sequences by group
vector<string> groups;
map<string, string> uniqueNames;
+ vector<string> temp;
if (hasCount) {
- cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
+ cparser = new SequenceCountParser(nameFile, fastaFileNames[s], temp);
groups = cparser->getNamesOfGroups();
uniqueNames = cparser->getAllSeqsMap();
}else{
- sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
+ sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile, temp);
groups = sparser->getNamesOfGroups();
uniqueNames = sparser->getAllSeqsMap();
}
@@ -642,12 +642,13 @@ int ChimeraVsearchCommand::execute(){
//Parse sequences by group
vector<string> groups;
map<string, string> uniqueNames;
+ vector<string> temp;
if (hasCount) {
- cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
+ cparser = new SequenceCountParser(nameFile, fastaFileNames[s], temp);
groups = cparser->getNamesOfGroups();
uniqueNames = cparser->getAllSeqsMap();
}else{
- sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
+ sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile, temp);
groups = sparser->getNamesOfGroups();
uniqueNames = sparser->getAllSeqsMap();
}
@@ -299,25 +299,11 @@ int PreClusterCommand::execute(){
ofstream outNames; m->openOutputFile(newNamesFile, outNames); outNames.close();
newMapFile = fileroot + "precluster.";
- //parse fasta and name file by group
- vector<string> groups;
- if (countfile != "") {
- cparser = new SequenceCountParser(countfile, fastafile);
- groups = cparser->getNamesOfGroups();
- }else {
- if (namefile != "") { parser = new SequenceParser(groupfile, fastafile, namefile); }
- else { parser = new SequenceParser(groupfile, fastafile); }
- groups = parser->getNamesOfGroups();
- }
-
- if(processors == 1) { driverGroups(newFastaFile, newNamesFile, newMapFile, 0, groups.size(), groups); }
- else { createProcessesGroups(newFastaFile, newNamesFile, newMapFile, groups); }
+ createProcessesGroups(newFastaFile, newNamesFile, newMapFile);
if (countfile != "") {
mergeGroupCounts(newCountFile, newNamesFile, newFastaFile);
- delete cparser;
- }else {
- delete parser;
+ }else {
//run unique.seqs for deconvolute results
string inputString = "fasta=" + newFastaFile;
if (namefile != "") { inputString += ", name=" + newNamesFile; }
@@ -400,14 +386,19 @@ int PreClusterCommand::execute(){
}
}
/**************************************************************************************************/
-int PreClusterCommand::createProcessesGroups(string newFName, string newNName, string newMFile, vector<string> groups) {
+int PreClusterCommand::createProcessesGroups(string newFName, string newNName, string newMFile) {
try {
vector<int> processIDS;
int process = 1;
int num = 0;
bool recalc = false;
+ //parse fasta and name file by group
+ vector<string> groups;
+ if (countfile != "") { CountTable ct; ct.testGroups(countfile, groups); }
+ else { GroupMap gp; gp.readMap(groupfile); groups = gp.getNamesOfGroups(); }
+
//sanity check
if (groups.size() < processors) { processors = groups.size(); }
@@ -603,15 +594,25 @@ int PreClusterCommand::createProcessesGroups(string newFName, string newNName, s
/**************************************************************************************************/
int PreClusterCommand::driverGroups(string newFFile, string newNFile, string newMFile, int start, int end, vector<string> groups){
try {
-
+ vector<string> subsetGroups;
+ for (int i = start; i < end; i++) { subsetGroups.push_back(groups[i]); }
+
+ //parse fasta and name file by group
+ if (countfile != "") {
+ cparser = new SequenceCountParser(countfile, fastafile, subsetGroups);
+ }else {
+ if (namefile != "") { parser = new SequenceParser(groupfile, fastafile, namefile, subsetGroups); }
+ else { parser = new SequenceParser(groupfile, fastafile, subsetGroups); }
+ }
+
int numSeqs = 0;
//precluster each group
for (int i = start; i < end; i++) {
start = time(NULL);
- if (m->control_pressed) { return 0; }
+ if (m->control_pressed) { if (countfile != "") { delete cparser; }else { delete parser; } return 0; }
m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[i] + ":"); m->mothurOutEndLine();
@@ -640,10 +641,11 @@ int PreClusterCommand::driverGroups(string newFFile, string newNFile, string new
m->mothurOut("pre.cluster removed " + toString(count) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
printData(newFFile, newNFile, groups[i]);
- m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
-
+ m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
}
+ if (countfile != "") { delete cparser; }else { delete parser; }
+
return numSeqs;
}
catch(exception& e) {
@@ -1013,15 +1015,33 @@ void PreClusterCommand::printData(string newfasta, string newname, string group)
}
if ((countfile != "") && (group == "")) { outNames << "Representative_Sequence\ttotal\n"; }
- for (int i = 0; i < alignSeqs.size(); i++) {
- if (alignSeqs[i].numIdentical != 0) {
- alignSeqs[i].seq.printSequence(outFasta);
- if (countfile != "") {
- if (group != "") { outNames << group << '\t' << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl; }
- else { outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].numIdentical << endl; }
- }else { outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl; }
- }
- }
+
+ if (countfile != "") {
+ if (group != "") {
+ for (int i = 0; i < alignSeqs.size(); i++) {
+ if (alignSeqs[i].numIdentical != 0) {
+ alignSeqs[i].seq.printSequence(outFasta);
+ outNames << group << '\t' << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl;
+ }
+ }
+ }
+ else {
+ for (int i = 0; i < alignSeqs.size(); i++) {
+ if (alignSeqs[i].numIdentical != 0) {
+ alignSeqs[i].seq.printSequence(outFasta);
+ outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].numIdentical << endl;
+ }
+ }
+ }
+ }else {
+ for (int i = 0; i < alignSeqs.size(); i++) {
+ if (alignSeqs[i].numIdentical != 0) {
+ alignSeqs[i].seq.printSequence(outFasta);
+ outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl;
+ }
+ }
+ }
+
outFasta.close();
outNames.close();
@@ -92,8 +92,8 @@ class PreClusterCommand : public Command {
void printData(string, string, string); //fasta filename, names file name
int process(string);
int loadSeqs(map<string, string>&, vector<Sequence>&, string);
- int driverGroups(string, string, string, int, int, vector<string> groups);
- int createProcessesGroups(string, string, string, vector<string>);
+ int driverGroups(string, string, string, int, int, vector<string>);
+ int createProcessesGroups(string, string, string);
int mergeGroupCounts(string, string, string);
int filterSeqs();
};
@@ -163,14 +163,17 @@ static DWORD WINAPI MyPreclusterThreadFunction(LPVOID lpParam){
alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, 1000);
}
- //parse fasta and name file by group
- SequenceParser* parser;
+ vector<string> subsetGroups;
+ for (int i = pDataArray->start; i < pDataArray->end; i++) { subsetGroups.push_back(pDataArray->groups[i]); }
+
+ //parse fasta and name file by group
SequenceCountParser* cparser;
+ SequenceParser* parser;
if (pDataArray->countfile != "") {
- cparser = new SequenceCountParser(pDataArray->countfile, pDataArray->fastafile);
+ cparser = new SequenceCountParser(pDataArray->countfile, pDataArray->fastafile, subsetGroups);
}else {
- if (pDataArray->namefile != "") { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile, pDataArray->namefile); }
- else { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile); }
+ if (pDataArray->namefile != "") { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile, pDataArray->namefile, subsetGroups); }
+ else { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile, subsetGroups); }
}
int numSeqs = 0;
@@ -186,7 +189,7 @@ static DWORD WINAPI MyPreclusterThreadFunction(LPVOID lpParam){
int start = time(NULL);
- if (pDataArray->m->control_pressed) { delete parser; delete alignment;return 0; }
+ if (pDataArray->m->control_pressed) { if (pDataArray->countfile != "") { delete cparser; } else { delete parser; } delete alignment;return 0; }
pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Processing group " + pDataArray->groups[k] + ":"); pDataArray->m->mothurOutEndLine();
@@ -213,7 +213,8 @@ int ShhhSeqsCommand::execute() {
if (groupfile != "") {
//Parse sequences by group
- SequenceParser parser(groupfile, fastafile, namefile);
+ vector<string> temp;
+ SequenceParser parser(groupfile, fastafile, namefile, temp);
vector<string> groups = parser.getNamesOfGroups();
if (m->control_pressed) { return 0; }
@@ -270,8 +270,8 @@ int SplitGroupCommand::execute(){
int SplitGroupCommand::runNameGroup(){
try {
SequenceParser* parser;
- if (namefile == "") { parser = new SequenceParser(groupfile, fastafile); }
- else { parser = new SequenceParser(groupfile, fastafile, namefile); }
+ if (namefile == "") { parser = new SequenceParser(groupfile, fastafile, Groups); }
+ else { parser = new SequenceParser(groupfile, fastafile, namefile, Groups); }
if (m->control_pressed) { delete parser; return 0; }
Oops, something went wrong.

0 comments on commit 86b5ed3

Please sign in to comment.