Skip to content

Commit

Permalink
added fastq to list.seqs, get.seqs and remove.seqs. fixed bug where v…
Browse files Browse the repository at this point in the history
…enn command overwrote sharedotus files. fixed bug with sff.multiple setting processors=1 for future commands. Not using file redirects in commands it runs.
  • Loading branch information
mothur-westcott committed Oct 17, 2013
1 parent a935b75 commit 2ff2d03
Show file tree
Hide file tree
Showing 12 changed files with 350 additions and 81 deletions.
2 changes: 1 addition & 1 deletion commandoptionparser.cpp
Expand Up @@ -31,7 +31,7 @@ CommandOptionParser::CommandOptionParser(string input){
optionString = input.substr((openParen+1), (closeParen-openParen-1)); //optionString contains everything between "(" and ")".
}
else if (openParen == -1) { m->mothurOut("[ERROR]: You are missing ("); m->mothurOutEndLine(); }
else if (closeParen == -1) { m->mothurOut("[ERROR]:You are missing )"); m->mothurOutEndLine(); }
else if (closeParen == -1) { m->mothurOut("[ERROR]: You are missing )"); m->mothurOutEndLine(); }
}
catch(exception& e) {
m->errorOut(e, "CommandOptionParser", "CommandOptionParser");
Expand Down
88 changes: 85 additions & 3 deletions getseqscommand.cpp
Expand Up @@ -16,6 +16,7 @@
vector<string> GetSeqsCommand::setParameters(){
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta);
CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "FNGLT", "none","fastq",false,false,true); parameters.push_back(pfastq);
CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount);
CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false,true); parameters.push_back(pgroup);
Expand All @@ -42,9 +43,9 @@ vector<string> GetSeqsCommand::setParameters(){
string GetSeqsCommand::getHelpString(){
try {
string helpString = "";
helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, count, list, taxonomy, quality or alignreport file.\n";
helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, count, list, taxonomy, quality, fastq or alignreport file.\n";
helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups. You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport, fastq and dups. You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=true. \n";
helpString += "The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
helpString += "Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
Expand All @@ -64,6 +65,7 @@ GetSeqsCommand::GetSeqsCommand(){
setParameters();
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["fastq"] = tempOutNames;
outputTypes["taxonomy"] = tempOutNames;
outputTypes["name"] = tempOutNames;
outputTypes["group"] = tempOutNames;
Expand All @@ -84,6 +86,7 @@ string GetSeqsCommand::getOutputPattern(string type) {
string pattern = "";

if (type == "fasta") { pattern = "[filename],pick,[extension]"; }
else if (type == "fastq") { pattern = "[filename],pick,[extension]"; }
else if (type == "taxonomy") { pattern = "[filename],pick,[extension]"; }
else if (type == "name") { pattern = "[filename],pick,[extension]"; }
else if (type == "group") { pattern = "[filename],pick,[extension]"; }
Expand Down Expand Up @@ -127,6 +130,7 @@ GetSeqsCommand::GetSeqsCommand(string option) {
//initialize outputTypes
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["fastq"] = tempOutNames;
outputTypes["taxonomy"] = tempOutNames;
outputTypes["name"] = tempOutNames;
outputTypes["group"] = tempOutNames;
Expand Down Expand Up @@ -223,6 +227,14 @@ GetSeqsCommand::GetSeqsCommand(string option) {
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["count"] = inputDir + it->second; }
}

it = parameters.find("fastq");
//user has given a template file
if(it != parameters.end()){
path = m->hasPath(it->second);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["fastq"] = inputDir + it->second; }
}
}


Expand Down Expand Up @@ -273,6 +285,10 @@ GetSeqsCommand::GetSeqsCommand(string option) {
if (qualfile == "not open") { abort = true; }
else if (qualfile == "not found") { qualfile = ""; }
else { m->setQualFile(qualfile); }

fastqfile = validParameter.validFile(parameters, "fastq", true);
if (fastqfile == "not open") { abort = true; }
else if (fastqfile == "not found") { fastqfile = ""; }

accnosfile2 = validParameter.validFile(parameters, "accnos2", true);
if (accnosfile2 == "not open") { abort = true; }
Expand All @@ -296,7 +312,7 @@ GetSeqsCommand::GetSeqsCommand(string option) {
string temp = validParameter.validFile(parameters, "dups", false); if (temp == "not found") { temp = "true"; usedDups = ""; }
dups = m->isTrue(temp);

if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == "") && (countfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
if ((fastqfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == "") && (countfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, quality, fastq or listfile."); m->mothurOutEndLine(); abort = true; }

if (countfile == "") {
if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
Expand Down Expand Up @@ -333,6 +349,7 @@ int GetSeqsCommand::execute(){
//read through the correct file and output lines you want to keep
if (namefile != "") { readName(); }
if (fastafile != "") { readFasta(); }
if (fastqfile != "") { readFastq(); }
if (groupfile != "") { readGroup(); }
if (countfile != "") { readCount(); }
if (alignfile != "") { readAlign(); }
Expand Down Expand Up @@ -398,6 +415,71 @@ int GetSeqsCommand::execute(){
exit(1);
}
}
//**********************************************************************************************************************
int GetSeqsCommand::readFastq(){
try {
bool wroteSomething = false;
int selectedCount = 0;

ifstream in;
m->openInputFile(fastqfile, in);

string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastqfile); }
map<string, string> variables;
variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastqfile));
variables["[extension]"] = m->getExtension(fastqfile);
string outputFileName = getOutputFileName("fastq", variables);
ofstream out;
m->openOutputFile(outputFileName, out);


while(!in.eof()){

if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }

//read sequence name
string input = m->getline(in); m->gobble(in);

string outputString = input + "\n";

if (input[0] == '@') {
//get rest of lines
outputString += m->getline(in) + "\n"; m->gobble(in);
outputString += m->getline(in) + "\n"; m->gobble(in);
outputString += m->getline(in) + "\n"; m->gobble(in);

vector<string> splits = m->splitWhiteSpace(input);
string name = splits[0];
name = name.substr(1);
m->checkName(name);

if (names.count(name) != 0) {
wroteSomething = true;
selectedCount++;
out << outputString;
}
}

m->gobble(in);
}
in.close();
out.close();


if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine(); }
outputNames.push_back(outputFileName); outputTypes["fastq"].push_back(outputFileName);

m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fastq file."); m->mothurOutEndLine();

return 0;

}
catch(exception& e) {
m->errorOut(e, "GetSeqsCommand", "readFastq");
exit(1);
}
}

//**********************************************************************************************************************
int GetSeqsCommand::readFasta(){
Expand Down
3 changes: 2 additions & 1 deletion getseqscommand.h
Expand Up @@ -36,13 +36,14 @@ class GetSeqsCommand : public Command {
private:
set<string> names;
vector<string> outputNames;
string accnosfile, accnosfile2, fastafile, namefile, countfile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
string accnosfile, accnosfile2, fastafile, fastqfile, namefile, countfile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
bool abort, dups;
map<string, string> uniqueMap;
//for debug
map<string, set<string> > sanity; //maps file type to names chosen for file. something like "fasta" -> vector<string>. If running in debug mode this is filled and we check to make sure all the files have the same names. If they don't we output the differences for the user.

int readFasta();
int readFastq();
int readName();
int readGroup();
int readCount();
Expand Down
67 changes: 64 additions & 3 deletions listseqscommand.cpp
Expand Up @@ -16,6 +16,7 @@
//**********************************************************************************************************************
vector<string> ListSeqsCommand::setParameters(){
try {
CommandParameter pfastq("fastq", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pfastq);
CommandParameter pfasta("fasta", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pfasta);
CommandParameter pname("name", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pname);
CommandParameter pcount("count", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pcount);
Expand All @@ -39,8 +40,8 @@ vector<string> ListSeqsCommand::setParameters(){
string ListSeqsCommand::getHelpString(){
try {
string helpString = "";
helpString += "The list.seqs command reads a fasta, name, group, count, list, taxonomy or alignreport file and outputs a .accnos file containing sequence names.\n";
helpString += "The list.seqs command parameters are fasta, name, group, count, list, taxonomy and alignreport. You must provide one of these parameters.\n";
helpString += "The list.seqs command reads a fasta, name, group, count, list, taxonomy, fastq or alignreport file and outputs a .accnos file containing sequence names.\n";
helpString += "The list.seqs command parameters are fasta, name, group, count, list, taxonomy, fastq and alignreport. You must provide one of these parameters.\n";
helpString += "The list.seqs command should be in the following format: list.seqs(fasta=yourFasta).\n";
helpString += "Example list.seqs(fasta=amazon.fasta).\n";
helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
Expand Down Expand Up @@ -169,6 +170,14 @@ ListSeqsCommand::ListSeqsCommand(string option) {
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["count"] = inputDir + it->second; }
}

it = parameters.find("fastq");
//user has given a template file
if(it != parameters.end()){
path = m->hasPath(it->second);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["fastq"] = inputDir + it->second; }
}
}

//check for required parameters
Expand Down Expand Up @@ -205,8 +214,12 @@ ListSeqsCommand::ListSeqsCommand(string option) {
if (countfile == "not open") { abort = true; }
else if (countfile == "not found") { countfile = ""; }
else { m->setCountTableFile(countfile); }

fastqfile = validParameter.validFile(parameters, "fastq", true);
if (fastqfile == "not open") { abort = true; }
else if (fastqfile == "not found") { fastqfile = ""; }

if ((countfile == "") && (fastafile == "") && (namefile == "") && (listfile == "") && (groupfile == "") && (alignfile == "") && (taxfile == "")) { m->mothurOut("You must provide a file."); m->mothurOutEndLine(); abort = true; }
if ((fastqfile == "") && (countfile == "") && (fastafile == "") && (namefile == "") && (listfile == "") && (groupfile == "") && (alignfile == "") && (taxfile == "")) { m->mothurOut("You must provide a file."); m->mothurOutEndLine(); abort = true; }

int okay = 1;
if (outputDir != "") { okay++; }
Expand All @@ -230,6 +243,7 @@ int ListSeqsCommand::execute(){

//read functions fill names vector
if (fastafile != "") { inputFileName = fastafile; readFasta(); }
else if (fastqfile != "") { inputFileName = fastqfile; readFastq(); }
else if (namefile != "") { inputFileName = namefile; readName(); }
else if (groupfile != "") { inputFileName = groupfile; readGroup(); }
else if (alignfile != "") { inputFileName = alignfile; readAlign(); }
Expand Down Expand Up @@ -285,6 +299,53 @@ int ListSeqsCommand::execute(){
exit(1);
}
}
//**********************************************************************************************************************
int ListSeqsCommand::readFastq(){
try {

ifstream in;
m->openInputFile(fastqfile, in);
string name;

//ofstream out;
//string newFastaName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "numsAdded.fasta";
//m->openOutputFile(newFastaName, out);
int count = 1;
//string lastName = "";

while(!in.eof()){

if (m->control_pressed) { in.close(); return 0; }

//read sequence name
string name = m->getline(in); m->gobble(in);

if (name[0] == '@') {
vector<string> splits = m->splitWhiteSpace(name);
name = splits[0];
name = name.substr(1);
m->checkName(name);
names.push_back(name);
//get rest of lines
name = m->getline(in); m->gobble(in);
name = m->getline(in); m->gobble(in);
name = m->getline(in); m->gobble(in);
}

m->gobble(in);
if (m->debug) { count++; cout << "[DEBUG]: count = " + toString(count) + ", name = " + name + "\n"; }
}
in.close();
//out.close();

return 0;

}
catch(exception& e) {
m->errorOut(e, "ListSeqsCommand", "readFastq");
exit(1);
}
}

//**********************************************************************************************************************
int ListSeqsCommand::readFasta(){
Expand Down
4 changes: 2 additions & 2 deletions listseqscommand.h
Expand Up @@ -35,7 +35,7 @@ class ListSeqsCommand : public Command {

private:
vector<string> names, outputNames;
string fastafile, namefile, groupfile, countfile, alignfile, inputFileName, outputDir, listfile, taxfile;
string fastafile, namefile, groupfile, countfile, alignfile, inputFileName, outputDir, listfile, taxfile, fastqfile;
bool abort;

int readFasta();
Expand All @@ -45,7 +45,7 @@ class ListSeqsCommand : public Command {
int readList();
int readTax();
int readCount();
int readFastq();
};

#endif
Expand Down
58 changes: 58 additions & 0 deletions mothurout.cpp
Expand Up @@ -3013,6 +3013,64 @@ void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){
}
}
/***********************************************************************/
bool MothurOut::checkLocations(string& filename, string inputDir){
try {
filename = getFullPathName(filename);

int ableToOpen;
ifstream in;
ableToOpen = openInputFile(filename, in, "noerror");
in.close();

//if you can't open it, try input location
if (ableToOpen == 1) {
if (inputDir != "") { //default path is set
string tryPath = inputDir + getSimpleName(filename);
mothurOut("Unable to open " + filename + ". Trying input directory " + tryPath); mothurOutEndLine();
ifstream in2;
ableToOpen = openInputFile(tryPath, in2, "noerror");
in2.close();
filename = tryPath;
}
}

//if you can't open it, try default location
if (ableToOpen == 1) {
if (getDefaultPath() != "") { //default path is set
string tryPath = getDefaultPath() + getSimpleName(filename);
mothurOut("Unable to open " + filename + ". Trying default " + tryPath); mothurOutEndLine();
ifstream in2;
ableToOpen = openInputFile(tryPath, in2, "noerror");
in2.close();
filename = tryPath;
}
}

//if you can't open it its not in current working directory or inputDir, try mothur excutable location
if (ableToOpen == 1) {
string exepath = argv;
string tempPath = exepath;
for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
exepath = exepath.substr(0, (tempPath.find_last_of('m')));

string tryPath = getFullPathName(exepath) + getSimpleName(filename);
mothurOut("Unable to open " + filename + ". Trying mothur's executable location " + tryPath); mothurOutEndLine();
ifstream in2;
ableToOpen = openInputFile(tryPath, in2, "noerror");
in2.close();
filename = tryPath;
}

if (ableToOpen == 1) { mothurOut("Unable to open " + filename + "."); mothurOutEndLine(); return false; }

return true;
}
catch(exception& e) {
errorOut(e, "MothurOut", "checkLocations");
exit(1);
}
}
/***********************************************************************/

//This function parses the estimator options and puts them in a vector
void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbol) {
Expand Down
4 changes: 3 additions & 1 deletion mothurout.h
Expand Up @@ -97,7 +97,9 @@ class MothurOut {
int openOutputFileAppend(string, ofstream&);
int openOutputFileBinaryAppend(string, ofstream&);
int openInputFile(string, ifstream&);
int openInputFile(string, ifstream&, string); //no error given
int openInputFile(string, ifstream&, string); //no error given

bool checkLocations(string&, string); //filename, inputDir. checks for file in ./, inputdir, default and mothur's exe location. Returns false if cant be found. If found completes name with location
string getline(ifstream&);
string getline(istringstream&);
void gobble(istream&);
Expand Down

0 comments on commit 2ff2d03

Please sign in to comment.