Permalink
Browse files

Adds taxonomy spaces to summary.tax

  • Loading branch information...
1 parent bfc09fe commit 5a9d9080cc1b9c6b6bd8853f0be5ea115049a84c @mothur-westcott mothur-westcott committed Aug 16, 2016
@@ -101,7 +101,6 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
m->mothurOut("DONE."); m->mothurOutEndLine();
m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine();
-
readTaxonomy(taxFile);
//sanity check
@@ -292,10 +292,7 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
if (level > maxLevel) { maxLevel = level; }
- if ((seqTaxonomy == "") && (level < maxLevel)) { //if you think you are done and you are not.
- for (int k = level; k < maxLevel; k++) { seqTaxonomy += "unclassified;"; }
- }
- }
+ }
return 0;
}
catch(exception& e) {
@@ -368,9 +365,6 @@ int PhyloSummary::addSeqToTree(string seqTaxonomy, map<string, bool> containsGro
if (level > maxLevel) { maxLevel = level; }
- if ((seqTaxonomy == "") && (level < maxLevel)) { //if you think you are done and you are not.
- for (int k = level; k < maxLevel; k++) { seqTaxonomy += "unclassified;"; }
- }
}
return 0;
}
@@ -755,13 +749,15 @@ void PhyloSummary::readTreeStruct(ifstream& in){
//read the tree file
for (int i = 0; i < tree.size(); i++) {
- in >> tree[i].level >> tree[i].name >> num; //num contains the number of children tree[i] has
-
+ in >> tree[i].level >> num; m->gobble(in); //num contains the number of children tree[i] has
+ tree[i].name = m->getline(in); m->gobble(in);
+
//set children
string childName;
int childIndex;
for (int j = 0; j < num; j++) {
- in >> childName >> childIndex;
+ in >> childIndex; m->gobble(in);
+ childName = m->getline(in); m->gobble(in);
tree[i].children[childName] = childIndex;
}
@@ -46,7 +46,8 @@ PhyloTree::PhyloTree(ifstream& in, string filename){
in >> maxLevel; m->gobble(in);
for (int i = 0; i < tree.size(); i++) {
- in >> tree[i].name >> tree[i].level >> tree[i].parent; m->gobble(in);
+ tree[i].name = m->getline(in); m->gobble(in);
+ in >> tree[i].level >> tree[i].parent; m->gobble(in);
}
//read genus nodes
@@ -454,11 +455,12 @@ void PhyloTree::print(ofstream& out, vector<TaxNode>& copy){
for (int i = 0; i < copy.size(); i++) {
- out << copy[i].level << '\t'<< copy[i].name << '\t' << copy[i].children.size();
+ out << copy[i].level << '\t' << copy[i].children.size() << endl;
+ out << copy[i].name << endl;
map<string,int>::iterator it;
for(it=copy[i].children.begin();it!=copy[i].children.end();it++){
- out << '\t' << it->first << '\t' << it->second;
+ out << it->second << '\t' << it->first << endl;
}
out << endl;
}
@@ -485,7 +487,8 @@ void PhyloTree::printTreeNodes(string treefilename) {
outTree << maxLevel << endl;
for (int i = 0; i < tree.size(); i++) {
- outTree << tree[i].name << '\t' << tree[i].level << '\t' << tree[i].parent << endl;
+ outTree << tree[i].name << endl;
+ outTree << tree[i].level << '\t' << tree[i].parent << endl;
}
//print genus nodes
@@ -44,7 +44,8 @@ TaxEqualizer::TaxEqualizer(string tfile, int c, string o) : cutoff(c), outputDir
if (m->control_pressed) { break; }
- in >> name >> tax; m->gobble(in);
+ in >> name; m->gobble(in);
+ tax = m->getline(in); m->gobble(in);
if (containsConfidence) { m->removeConfidences(tax); }
@@ -78,7 +79,8 @@ int TaxEqualizer::getHighestLevel(ifstream& in) {
string name, tax;
while (in) {
- in >> name >> tax; m->gobble(in);
+ in >> name; m->gobble(in);
+ tax = m->getline(in); m->gobble(in);
//count levels in this taxonomy
int thisLevel = 0;
@@ -721,7 +721,8 @@ int ClassifySeqsCommand::execute(){
while (!inTax.eof()) {
if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete classify; return 0; }
- inTax >> name >> taxon; m->gobble(inTax);
+ inTax >> name; m->gobble(inTax);
+ taxon = m->getline(inTax); m->gobble(inTax);
string newTax = m->addUnclassifieds(taxon, maxLevel, probs);
@@ -220,7 +220,9 @@ SummaryTaxCommand::SummaryTaxCommand(string option) {
int SummaryTaxCommand::execute(){
try{
-
+
+ int maxLevel = findMaxLevel(taxfile);
+
if (abort == true) { if (calledHelp) { return 0; } return 2; }
int start = time(NULL);
@@ -241,63 +243,45 @@ int SummaryTaxCommand::execute(){
if (m->control_pressed) { if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; }
int numSeqs = 0;
- if ((threshold == 0) && ((namefile == "") || (countfile != ""))) { numSeqs = taxaSum->summarize(taxfile); }
- else if (threshold != 0) {
- ifstream in;
- m->openInputFile(taxfile, in);
+ map<string, vector<string> > nameMap;
+ map<string, vector<string> >::iterator itNames;
+ if (namefile != "") { m->readNames(namefile, nameMap); }
+
+ ifstream in;
+ m->openInputFile(taxfile, in);
+
+ string name, taxon;
+ while(!in.eof()){
- string name, taxon;
- while(!in.eof()){
-
- if (m->control_pressed) { break; }
-
- in >> name >> taxon; m->gobble(in);
-
- if (threshold != 0) { taxon = processTaxMap(taxon); }
-
- //cout << taxon << endl;
+ if (m->control_pressed) { break; }
+
+ in >> name; m->gobble(in);
+ taxon = m->getline(in); m->gobble(in);
+
+ string newTax = m->addUnclassifieds(taxon, maxLevel, true);
+
+ if (threshold != 0) { newTax = processTaxMap(newTax); }
+
+ //add sequence to summary, countfile info included from Phylosummary constructor
+ if (namefile != "") {
+ itNames = nameMap.find(name);
- //add sequence to summary, countfile info included from Phylosummary constructor
- taxaSum->addSeqToTree(name, taxon);
+ if (itNames == nameMap.end()) {
+ m->mothurOut(name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1);
+ }else{
+ for (int i = 0; i < itNames->second.size(); i++) {
+ taxaSum->addSeqToTree(itNames->second[i], newTax); //add it as many times as there are identical seqs
+ }
+ itNames->second.clear();
+ nameMap.erase(itNames->first);
+ }
+ }else {
+ taxaSum->addSeqToTree(name, newTax);
}
- in.close();
+
}
- else if (namefile != "") {
- map<string, vector<string> > nameMap;
- map<string, vector<string> >::iterator itNames;
- m->readNames(namefile, nameMap);
-
- if (m->control_pressed) { if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; }
-
- ifstream in;
- m->openInputFile(taxfile, in);
-
- //read in users taxonomy file and add sequences to tree
- string name, taxon;
-
- while(!in.eof()){
-
- if (m->control_pressed) { break; }
-
- in >> name >> taxon; m->gobble(in);
-
- if (threshold != 0) { taxon = processTaxMap(taxon); }
-
- itNames = nameMap.find(name);
-
- if (itNames == nameMap.end()) {
- m->mothurOut("[ERROR]: " + name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1);
- }else{
- for (int i = 0; i < itNames->second.size(); i++) {
- numSeqs++;
- taxaSum->addSeqToTree(itNames->second[i], taxon); //add it as many times as there are identical seqs
- }
- itNames->second.clear();
- nameMap.erase(itNames->first);
- }
- }
- in.close();
- }else if (threshold == 0) { numSeqs = taxaSum->summarize(taxfile); }
+ in.close();
+
if (m->control_pressed) { if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; }
@@ -382,10 +366,10 @@ string SummaryTaxCommand::processTaxMap(string tax) {
if (spot != 0) {
newTax = "";
for (int i = 0; i < taxons.size(); i++) { newTax += taxons[i] + ";"; }
- for (int i = spot; i < taxLength; i++) {
- if(tax[i] == ';'){ newTax += "unclassified;"; }
- m->removeConfidences(newTax);
- }
+ //for (int i = spot; i < taxLength; i++) {
+ //if(tax[i] == ';'){ newTax += "unclassified;"; }
+ //m->removeConfidences(newTax);
+ //}
}else { m->removeConfidences(tax); newTax = tax; } //leave tax alone
return newTax;
@@ -395,6 +379,21 @@ string SummaryTaxCommand::processTaxMap(string tax) {
exit(1);
}
}
+/**************************************************************************************************/
+int SummaryTaxCommand::findMaxLevel(string file) {
+ try{
+ GroupMap* groupMap = NULL;
+ PhyloSummary taxaSum(groupMap, false, -1);
+
+ taxaSum.summarize(file);
+
+ return taxaSum.getMaxLevel();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SummaryTaxCommand", "findMaxLevel");
+ exit(1);
+ }
+}
/**************************************************************************************/
@@ -41,6 +41,7 @@ class SummaryTaxCommand : public Command {
map<string, int> nameMap;
string processTaxMap(string);
+ int findMaxLevel(string);
};
/**************************************************************************************************/
View
@@ -2480,78 +2480,39 @@ vector<string> MothurOut::splitWhiteSpaceWithQuotes(string input){
}
}
//**********************************************************************************************************************
-int MothurOut::readTax(string namefile, map<string, string>& taxMap, bool removeConfidence) {
+int MothurOut::readTax(string taxfile, map<string, string>& taxMap, bool removeConfidence) {
try {
//open input file
ifstream in;
- openInputFile(namefile, in);
+ openInputFile(taxfile, in);
- string rest = "";
- char buffer[4096];
- bool pairDone = false;
- bool columnOne = true;
- string firstCol, secondCol;
bool error = false;
+ string name, taxonomy;
while (!in.eof()) {
if (control_pressed) { break; }
- in.read(buffer, 4096);
- vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+ in >> name; gobble(in);
+ taxonomy = getline(in); gobble(in);
- for (int i = 0; i < pieces.size(); i++) {
- if (columnOne) { firstCol = pieces[i]; columnOne=false; }
- else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
-
- if (pairDone) {
- checkName(firstCol);
- //are there confidence scores, if so remove them
- if (removeConfidence) { if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); } }
- map<string, string>::iterator itTax = taxMap.find(firstCol);
-
- if(itTax == taxMap.end()) {
- bool ignore = false;
- if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
- }
- if (!ignore) { taxMap[firstCol] = secondCol; }
- if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
- }else {
- mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique.\n"); error = true;
- }
- pairDone = false;
+ checkName(name);
+ //are there confidence scores, if so remove them
+ if (removeConfidence) { if (taxonomy.find_first_of('(') != -1) { removeConfidences(taxonomy); } }
+ map<string, string>::iterator itTax = taxMap.find(name);
+
+ if(itTax == taxMap.end()) {
+ bool ignore = false;
+ if (taxonomy != "") { if (taxonomy[taxonomy.length()-1] != ';') { mothurOut("[ERROR]: " + name + " is missing the final ';', ignoring.\n"); ignore=true; }
}
+ if (!ignore) { taxMap[name] = taxonomy; }
+ if (debug) { mothurOut("[DEBUG]: name = '" + name + "' tax = '" + taxonomy + "'\n"); }
+ }else {
+ mothurOut("[ERROR]: " + name + " is already in your taxonomy file, names must be unique./n"); error = true;
}
- }
- in.close();
-
- if (rest != "") {
- vector<string> pieces = splitWhiteSpace(rest);
-
- for (int i = 0; i < pieces.size(); i++) {
- if (columnOne) { firstCol = pieces[i]; columnOne=false; }
- else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
-
- if (pairDone) {
- checkName(firstCol);
- //are there confidence scores, if so remove them
- if (removeConfidence) { if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); } }
- map<string, string>::iterator itTax = taxMap.find(firstCol);
-
- if(itTax == taxMap.end()) {
- bool ignore = false;
- if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
- }
- if (!ignore) { taxMap[firstCol] = secondCol; }
- if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
- }else {
- mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); error = true;
- }
- pairDone = false;
- }
- }
}
-
+ in.close();
+
if (error) { control_pressed = true; }
if (debug) { mothurOut("[DEBUG]: numSeqs saved = '" + toString(taxMap.size()) + "'\n"); }
return taxMap.size();

0 comments on commit 5a9d908

Please sign in to comment.