Skip to content

Commit

Permalink
Sets default output for unique.seqs to count
Browse files Browse the repository at this point in the history
  • Loading branch information
mothur-westcott committed Sep 22, 2021
1 parent 07b5cde commit d1d3574
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 52 deletions.
28 changes: 11 additions & 17 deletions source/commands/deconvolutecommand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ vector<string> DeconvoluteCommand::setParameters(){
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta-name",false,true,true); parameters.push_back(pfasta);
CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname);
CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount);
CommandParameter pformat("format", "Multiple", "count-name", "name", "", "", "","",false,false, true); parameters.push_back(pformat);
CommandParameter poutput("output", "Multiple", "count-name", "name", "", "", "","",false,false, true); parameters.push_back(poutput);
CommandParameter pformat("format", "Multiple", "count-name", "count", "", "", "","",false,false, true); parameters.push_back(pformat);
CommandParameter poutput("output", "Multiple", "count-name", "count", "", "", "","",false,false, true); parameters.push_back(poutput);
CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
Expand Down Expand Up @@ -46,7 +46,7 @@ string DeconvoluteCommand::getHelpString(){
helpString += "The unique.seqs command parameters are fasta, name, count and format. fasta is required, unless there is a valid current fasta file.\n";
helpString += "The name parameter is used to provide an existing name file associated with the fasta file. \n";
helpString += "The count parameter is used to provide an existing count file associated with the fasta file. \n";
helpString += "The format parameter is used to indicate what type of file you want outputted. Choices are name and count, default=name unless count file used then default=count.\n";
helpString += "The format parameter is used to indicate what type of file you want outputted. Choices are name and count, default=count unless name file used then default=name.\n";
helpString += "The unique.seqs command should be in the following format: \n";
helpString += "unique.seqs(fasta=yourFastaFile) \n";
return helpString;
Expand Down Expand Up @@ -94,10 +94,7 @@ DeconvoluteCommand::DeconvoluteCommand(string option) : Command() {
else { m->mothurOut("You have no current fastafile and the fasta parameter is required.\n"); abort = true; }
}else { current->setFastaFile(fastafile); }


if (outputdir == ""){
outputdir += util.hasPath(fastafile);
}
if (outputdir == ""){ outputdir += util.hasPath(fastafile); }

namefile = validParameter.validFile(parameters, "name");
if (namefile == "not open") { namefile = ""; abort = true; }
Expand All @@ -116,14 +113,14 @@ DeconvoluteCommand::DeconvoluteCommand(string option) : Command() {
if(format == "not found"){
format = validParameter.valid(parameters, "output");
if(format == "not found"){
if (countfile != "") { format = "count"; }
else { format = "name"; }
if (namefile != "") { format = "name"; }
else { format = "count"; }
}
}

if ((format != "name") && (format != "count")) {
m->mothurOut(format + " is not a valid format option. Options are count or name.");
if (countfile == "") { m->mothurOut("I will use name.\n"); format = "name"; }
if (countfile == "") { m->mothurOut("I will use count.\n"); format = "count"; }
else { m->mothurOut("I will use count.\n"); format = "count"; }
}

Expand Down Expand Up @@ -174,11 +171,8 @@ int DeconvoluteCommand::execute() {

if (m->getControl_pressed()) { return 0; }

ifstream in;
util.openInputFile(fastafile, in);

ofstream outFasta;
util.openOutputFile(outFastaFile, outFasta);
ifstream in; util.openInputFile(fastafile, in);
ofstream outFasta; util.openOutputFile(outFastaFile, outFasta);

map<string, string> sequenceStrings; //sequenceString -> list of names. "atgc...." -> seq1,seq2,seq3.
map<string, string>::iterator itStrings;
Expand Down Expand Up @@ -277,7 +271,7 @@ int DeconvoluteCommand::execute() {
else { util.openOutputFile(outCountFile, outNames); outputTypes["count"].push_back(outCountFile); outputNames.push_back(outCountFile); }

if ((countfile != "") && (format == "count")) { ct.printHeaders(outNames); }
else if ((countfile == "") && (format == "count")) { newCt.printHeaders(outNames); }
else if ((countfile == "") && (format == "count")) { newCt.printCompressedHeaders(outNames); }

for (int i = 0; i < nameFileOrder.size(); i++) {
if (m->getControl_pressed()) { outputTypes.clear(); util.mothurRemove(outFastaFile); outNames.close(); for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; }
Expand All @@ -296,7 +290,7 @@ int DeconvoluteCommand::execute() {
}
}else {
if (countfile != "") { ct.printSeq(outNames, itStrings->second); }
else if (format == "count") { newCt.printSeq(outNames, itStrings->second); }
else if (format == "count") { newCt.printCompressedSeq(outNames, itStrings->second); }
}
}else{ m->mothurOut("[ERROR]: mismatch in namefile print.\n"); m->setControl_pressed(true); }
}
Expand Down
167 changes: 133 additions & 34 deletions source/datastructures/counttable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,30 @@
//

#include "counttable.h"
#include "groupmap.h"

/************************************************************/
//used by tree commands
int CountTable::createTable(map<string, string>& g) {
try {
set<string> names; set<string> groups;

for (map<string, string>::iterator it = g.begin(); it != g.end(); it++) {

if (m->getControl_pressed()) { break; }

names.insert(it->first);
groups.insert(it->second);
}

createTable(names, g, groups);

}
catch(exception& e) {
m->errorOut(e, "CountTable", "createTable");
exit(1);
}
}
/************************************************************/
//used by tree commands
int CountTable::createTable(set<string>& n, map<string, string>& g, set<string>& gs) {
Expand Down Expand Up @@ -962,45 +985,15 @@ vector<string> CountTable::printTable(string file, bool compressedFormat) {
//zeroed seqs are not printed
vector<string> CountTable::printCompressedTable(string file, vector<string> groupsToPrint) {
try {
ofstream out;
util.openOutputFile(file, out);
ofstream out; util.openOutputFile(file, out);

vector<string> namesInTable;

bool pickedGroups = false;
set<int> selectedGroupsIndicies;
if (groupsToPrint.size() != 0) { if (hasGroups) { pickedGroups = true; } } //if no groups selected, print all groups

set<int> selectedGroupsIndicies = printCompressedHeaders(out, groupsToPrint);

if (total != 0) {
if (hasGroups) {

map<int, string> reverse;
for (map<string, int>::iterator it = indexGroupMap.begin(); it !=indexGroupMap.end(); it++) { reverse[it->second] = it->first; }

map<int, string>::iterator it = reverse.begin();
string group1Name = it->second;
if (pickedGroups) { //find selected groups indicies
for (map<int, string>::iterator it = reverse.begin(); it != reverse.end(); it++) {
if (util.inUsersGroups(it->second, groupsToPrint)) { group1Name = it->second; break; }
}
}

out << "#Compressed Format: groupIndex,abundance. For example 1,6 would mean the read has an abundance of 6 for group " + group1Name + "." << endl;
out << "#";

for (map<int, string>::iterator it = reverse.begin(); it != reverse.end(); it++) {
if (pickedGroups) { //find selected groups indicies
if (util.inUsersGroups(it->second, groupsToPrint)) {
selectedGroupsIndicies.insert(it->first);

out << it->first+1 << "," << it->second << "\t";
}
}else { out << it->first+1 << "," << it->second << "\t"; }
}
out << endl;
}

printHeaders(out, groupsToPrint);

map<int, string> reverse; //use this to preserve order
for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; }
Expand Down Expand Up @@ -1237,6 +1230,52 @@ int CountTable::printHeaders(ofstream& out, vector<string> selectedGroups) {
}
}
/************************************************************/
set<int> CountTable::printCompressedHeaders(ofstream& out, vector<string> groupsToPrint) {
try {
bool pickedGroups = false;
set<int> selectedGroupsIndicies;
if (groupsToPrint.size() != 0) { if (hasGroups) { pickedGroups = true; } } //if no groups selected, print all groups

if (total != 0) {
if (hasGroups) {

map<int, string> reverse;
for (map<string, int>::iterator it = indexGroupMap.begin(); it !=indexGroupMap.end(); it++) { reverse[it->second] = it->first; }

map<int, string>::iterator it = reverse.begin();
string group1Name = it->second;
if (pickedGroups) { //find selected groups indicies
for (map<int, string>::iterator it = reverse.begin(); it != reverse.end(); it++) {
if (util.inUsersGroups(it->second, groupsToPrint)) { group1Name = it->second; break; }
}
}

out << "#Compressed Format: groupIndex,abundance. For example 1,6 would mean the read has an abundance of 6 for group " + group1Name + "." << endl;
out << "#";

for (map<int, string>::iterator it = reverse.begin(); it != reverse.end(); it++) {
if (pickedGroups) { //find selected groups indicies
if (util.inUsersGroups(it->second, groupsToPrint)) {
selectedGroupsIndicies.insert(it->first);

out << it->first+1 << "," << it->second << "\t";
}
}else { out << it->first+1 << "," << it->second << "\t"; }
}
out << endl;
}

printHeaders(out, groupsToPrint);
}

return selectedGroupsIndicies;
}
catch(exception& e) {
m->errorOut(e, "CountTable", "printCompressedHeaders");
exit(1);
}
}
/************************************************************/
int CountTable::printSeq(ofstream& out, string seqName) {
try {
map<string, int>::iterator it = indexNameMap.find(seqName);
Expand All @@ -1258,7 +1297,67 @@ int CountTable::printSeq(ofstream& out, string seqName) {
exit(1);
}
}

/************************************************************/
int CountTable::printCompressedSeq(ofstream& out, string seqName, vector<string> groupsToPrint) {
try {
map<string, int>::iterator itName = indexNameMap.find(seqName);
if (itName == indexNameMap.end()) {
m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true);
}else {
int i = itName->second;
if (totals[i] != 0) {

if (hasGroups) {
bool pickedGroups = false;
if (groupsToPrint.size() != 0) { if (hasGroups) { pickedGroups = true; } } //if no groups selected, print all groups

if (pickedGroups) {

map<int, string> reverse; //index to group
for (map<string, int>::iterator it = indexGroupMap.begin(); it !=indexGroupMap.end(); it++) { reverse[it->second] = it->first; }

set<int> selectedGroupsIndicies;
for (map<int, string>::iterator it = reverse.begin(); it != reverse.end(); it++) {
if (pickedGroups) { //find selected groups indicies
if (util.inUsersGroups(it->second, groupsToPrint)) {
selectedGroupsIndicies.insert(it->first);
}
}
}

string groupOutput = "";
long long thisTotal = 0;
for (int j = 0; j < counts[i].size(); j++) {

if (selectedGroupsIndicies.count(counts[i][j].group) != 0) { //this is a group we want
groupOutput += '\t' + toString(counts[i][j].group+1) + ',' + toString(counts[i][j].abund);
thisTotal += counts[i][j].abund;
}
}

if (thisTotal != 0) {
out << itName->first << '\t' << thisTotal << groupOutput << endl;
}
}
else {
out << itName->first << '\t' << totals[i];

for (int j = 0; j < counts[i].size(); j++) {
out << '\t' << counts[i][j].group+1 << ',' << counts[i][j].abund;
}
}
}else { out << itName->first << '\t' << totals[i]; }

out << endl;
}
}
return 0;
}
catch(exception& e) {
m->errorOut(e, "CountTable", "printCompressedSeq");
exit(1);
}
}
/************************************************************/
//group counts for a seq
vector<int> CountTable::getGroupCounts(string seqName) {
Expand Down
4 changes: 3 additions & 1 deletion source/datastructures/counttable.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@

#include "mothurout.h"
#include "listvector.hpp"
#include "groupmap.h"
#include "sequence.hpp"
#include "sharedrabundvectors.hpp"

Expand All @@ -66,6 +65,7 @@ class CountTable {
~CountTable() {}

//reads and creates smart enough to eliminate groups with zero counts
int createTable(map<string, string>&); //seqName->group
int createTable(set<string>&, map<string, string>&, set<string>&); //seqNames, seqName->group, groupNames
int createTable(string, string, vector<string>, bool createGroup=false); //namefile, groupfile, selectedGroups, createGroup,
int readTable(string, bool, bool); //filename, readGroups, mothurRunning
Expand All @@ -89,8 +89,10 @@ class CountTable {
vector<string> printTable(string, bool compress); //preserves order in original, printing compressed or not based on compress flag pasted in
vector<string> printSortedTable(string); //sorted by seqName
int printHeaders(ofstream&, vector<string> optionalGroups=nullVector);
set<int> printCompressedHeaders(ofstream&, vector<string> optionalGroups=nullVector);
vector<string> getHardCodedHeaders(); //Representative_Sequence, total
int printSeq(ofstream&, string);
int printCompressedSeq(ofstream&, string, vector<string> optionalGroups=nullVector);

bool testGroups(string file); //used to check if file has group data without reading it
bool testGroups(string file, vector<string>&); //used to check if file has group data without reading it, return groups if found.
Expand Down

0 comments on commit d1d3574

Please sign in to comment.