Skip to content

Commit

Permalink
Adds ability to read compressed fastq files in list.seqs
Browse files Browse the repository at this point in the history
  • Loading branch information
mothur-westcott committed Dec 3, 2021
1 parent 603f4be commit 479813d
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 18 deletions.
52 changes: 42 additions & 10 deletions source/commands/listseqscommand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ ListSeqsCommand::ListSeqsCommand(string option) : Command() {
else { current->setCountFile(countfiles[0]); }
}

fastqfiles = validParameter.validFiles(parameters, "fastq");
fastqfiles = validParameter.validFastqGZFiles(parameters, "fastq", gz);
if (fastqfiles.size() != 0) {
if (fastqfiles[0] == "not open") { abort = true; }
}
Expand All @@ -160,6 +160,33 @@ void addName(bool empty, string name, set<string>& names, set<string>& newNames)
if (names.count(name) != 0) { newNames.insert(name); } //present in files so far so add to newNames
}
}
#ifdef USE_BOOST
//**********************************************************************************************************************
void readFastq(set<string>& names, boost::iostreams::filtering_istream& inBoost, MothurOut*& m){
try {
set<string> newNames;
bool empty = true;
if (names.size() != 0) { empty=false; }
Utils util;

while(!inBoost.eof()){

if (m->getControl_pressed()) { break; }

bool ignore;
FastqRead fread(inBoost, ignore, "illumina1.8+"); util.gobble(inBoost);

if (!ignore) { addName(empty, fread.getName(), names, newNames); }
}

names = newNames;
}
catch(exception& e) {
m->errorOut(e, "ListSeqsCommand", "readFastq");
exit(1);
}
}
#endif
//**********************************************************************************************************************
void readFastq(set<string>& names, ifstream& in, MothurOut*& m){
try {
Expand Down Expand Up @@ -347,7 +374,7 @@ int ListSeqsCommand::execute(){
//read functions fill names vector
if (fastafiles.size() != 0) { process(fastafiles, names, &readFasta); }
if (qualityfiles.size() != 0) { process(qualityfiles, names, &readQual); }
if (fastqfiles.size() != 0) { process(fastqfiles, names, "fastq", &readFastq); }
if (fastqfiles.size() != 0) { process(fastqfiles, names); }
if (namefiles.size() != 0) { process(namefiles, names, &readNameTaxGroup); }
if (groupfiles.size() != 0) { process(groupfiles, names, &readNameTaxGroup); }
if (taxfiles.size() != 0) { process(taxfiles, names, &readNameTaxGroup); }
Expand Down Expand Up @@ -388,22 +415,27 @@ int ListSeqsCommand::execute(){
}
}
//**********************************************************************************************************************
void ListSeqsCommand::process(vector<string> files, set<string>& names, string isFastq, void f(set<string>&, ifstream&, MothurOut*&)){
void ListSeqsCommand::process(vector<string> files, set<string>& names){
try {
Utils util;

//determine if the files are compressed. If so,

for (int i = 0; i < files.size(); i++) {
if (m->getControl_pressed()) { break; }

inputFileName = files[i];

ifstream in; util.openInputFile(inputFileName, in);

f(names, in, m);

in.close();
if (!gz) {
ifstream in; util.openInputFile(inputFileName, in);
readFastq(names, in, m);
in.close();
}else {
#ifdef USE_BOOST
ifstream in; boost::iostreams::filtering_istream inBoost;
util.openInputFileBinary(inputFileName, in, inBoost);
readFastq(names, inBoost, m);
in.close(); inBoost.pop();
#endif
}
}
}
catch(exception& e) {
Expand Down
4 changes: 2 additions & 2 deletions source/commands/listseqscommand.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ class ListSeqsCommand : public Command {
vector<string> outputNames;
vector<string> fastafiles, namefiles, groupfiles, countfiles, alignfiles, listfiles, taxfiles, fastqfiles, contigsreportfiles, qualityfiles;
string format, inputFileName;
bool abort;
bool abort, gz;

void process(vector<string> files, set<string>&);
void process(vector<string> files, set<string>&, void f(set<string>&, ifstream&, MothurOut*&));
void process(vector<string> files, set<string>&, string, void f(set<string>&, ifstream&, MothurOut*&));

};

Expand Down
12 changes: 6 additions & 6 deletions source/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ bool Utils::checkLocationsGZ(string& filename, vector< vector<string> > location

#ifdef USE_BOOST
boost::iostreams::filtering_istream inBoost;
ableToOpen = openInputFileBinary(filename, in, inBoost, "noerror");
ableToOpen = openInputFileBinary(filename, in, inBoost, "noerror"); in.close(); inBoost.pop();
#else
m->mothurOut("[ERROR]: cannot read gz format without enabling boost libraries.\n"); m->setControl_pressed(true); return false;
#endif
Expand All @@ -710,7 +710,7 @@ bool Utils::checkLocationsGZ(string& filename, vector< vector<string> > location
ifstream in2;
#ifdef USE_BOOST
boost::iostreams::filtering_istream inBoost2;
ableToOpen = openInputFileBinary(tryPath, in2, inBoost2, "noerror");
ableToOpen = openInputFileBinary(tryPath, in2, inBoost2, "noerror"); in2.close(); inBoost2.pop();
#endif
filename = tryPath;

Expand All @@ -728,7 +728,7 @@ bool Utils::checkLocationsGZ(string& filename, vector< vector<string> > location
ifstream in2;
#ifdef USE_BOOST
boost::iostreams::filtering_istream inBoost2;
ableToOpen = openInputFileBinary(tryPath, in2, inBoost2, "noerror");
ableToOpen = openInputFileBinary(tryPath, in2, inBoost2, "noerror"); in2.close(); inBoost2.pop();
#endif
filename = tryPath;
}
Expand All @@ -745,7 +745,7 @@ bool Utils::checkLocationsGZ(string& filename, vector< vector<string> > location
ifstream in2;
#ifdef USE_BOOST
boost::iostreams::filtering_istream inBoost2;
ableToOpen = openInputFileBinary(tryPath, in2, inBoost2, "noerror");
ableToOpen = openInputFileBinary(tryPath, in2, inBoost2, "noerror"); in2.close(); inBoost2.pop();
#endif
filename = tryPath;

Expand All @@ -762,7 +762,7 @@ bool Utils::checkLocationsGZ(string& filename, vector< vector<string> > location
ifstream in2;
#ifdef USE_BOOST
boost::iostreams::filtering_istream inBoost2;
ableToOpen = openInputFileBinary(tryPath, in2, inBoost2, "noerror");
ableToOpen = openInputFileBinary(tryPath, in2, inBoost2, "noerror"); in2.close(); inBoost2.pop();
#endif
filename = tryPath;
}
Expand All @@ -778,7 +778,7 @@ bool Utils::checkLocationsGZ(string& filename, vector< vector<string> > location
ifstream in2;
#ifdef USE_BOOST
boost::iostreams::filtering_istream inBoost2;
ableToOpen = openInputFileBinary(tryPath, in2, inBoost2, "noerror");
ableToOpen = openInputFileBinary(tryPath, in2, inBoost2, "noerror"); in2.close(); inBoost2.pop();
#endif
filename = tryPath;

Expand Down

0 comments on commit 479813d

Please sign in to comment.