Skip to content

Commit

Permalink
Adds validFastqGZFiles function
Browse files Browse the repository at this point in the history
  • Loading branch information
mothur-westcott committed Dec 3, 2021
1 parent 7324187 commit 603f4be
Show file tree
Hide file tree
Showing 7 changed files with 211 additions and 5 deletions.
28 changes: 26 additions & 2 deletions source/commands/listseqscommand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,8 @@ int ListSeqsCommand::execute(){

//read functions fill names vector
if (fastafiles.size() != 0) { process(fastafiles, names, &readFasta); }
if (qualityfiles.size() != 0) { process(qualityfiles, names, &readQual); }
if (fastqfiles.size() != 0) { process(fastqfiles, names, &readFastq); }
if (qualityfiles.size() != 0) { process(qualityfiles, names, &readQual); }
if (fastqfiles.size() != 0) { process(fastqfiles, names, "fastq", &readFastq); }
if (namefiles.size() != 0) { process(namefiles, names, &readNameTaxGroup); }
if (groupfiles.size() != 0) { process(groupfiles, names, &readNameTaxGroup); }
if (taxfiles.size() != 0) { process(taxfiles, names, &readNameTaxGroup); }
Expand Down Expand Up @@ -388,6 +388,30 @@ int ListSeqsCommand::execute(){
}
}
//**********************************************************************************************************************
void ListSeqsCommand::process(vector<string> files, set<string>& names, string isFastq, void f(set<string>&, ifstream&, MothurOut*&)){
try {
Utils util;

//determine if the files are compressed. If so,

for (int i = 0; i < files.size(); i++) {
if (m->getControl_pressed()) { break; }

inputFileName = files[i];

ifstream in; util.openInputFile(inputFileName, in);

f(names, in, m);

in.close();
}
}
catch(exception& e) {
m->errorOut(e, "ListSeqsCommand", "process");
exit(1);
}
}
//**********************************************************************************************************************
void ListSeqsCommand::process(vector<string> files, set<string>& names, void f(set<string>&, ifstream&, MothurOut*&)){
try {
Utils util;
Expand Down
2 changes: 2 additions & 0 deletions source/commands/listseqscommand.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ class ListSeqsCommand : public Command {
bool abort;

void process(vector<string> files, set<string>&, void f(set<string>&, ifstream&, MothurOut*&));
void process(vector<string> files, set<string>&, string, void f(set<string>&, ifstream&, MothurOut*&));

};

#endif
Expand Down
3 changes: 1 addition & 2 deletions source/datastructures/filefile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,7 @@ vector< vector<string> > FileFile::read(string f, string mode){

bool allGZ = true; bool allPlainTxt = true;

ifstream in;
util.openInputFile(filename, in);
ifstream in; util.openInputFile(filename, in);

while(!in.eof()) {

Expand Down
120 changes: 120 additions & 0 deletions source/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -677,6 +677,126 @@ bool Utils::checkLocations(string& filename, vector< vector<string> > locations)
}
}
/***********************************************************************/
//function assumes files are all gz
//locations[0] = inputdir paths, locations[1] = outputdirPaths, locations[2] = mothur's exe path, locations[3] = mothur tools paths, locations[4] = mothur_files paths
bool Utils::checkLocationsGZ(string& filename, vector< vector<string> > locations){
try {
filename = getFullPathName(filename);
vector<string> inputDirs = locations[0];
vector<string> outputDirs = locations[1];
vector<string> mothurPaths = locations[2];
vector<string> mothurToolsPaths = locations[3];
vector<string> mothurFilesPaths = locations[4];

ifstream in; bool ableToOpen;

#ifdef USE_BOOST
boost::iostreams::filtering_istream inBoost;
ableToOpen = openInputFileBinary(filename, in, inBoost, "noerror");
#else
m->mothurOut("[ERROR]: cannot read gz format without enabling boost libraries.\n"); m->setControl_pressed(true); return false;
#endif


//if you can't open it, try input location
if (!ableToOpen) {
for (int i = 0; i < inputDirs.size(); i++) {
string inputDir = inputDirs[i];

if (inputDir != "") { //default path is set
string tryPath = inputDir + getSimpleName(filename);
m->mothurOut("Unable to open " + filename + ". Trying input directory " + tryPath+ ".\n");

ifstream in2;
#ifdef USE_BOOST
boost::iostreams::filtering_istream inBoost2;
ableToOpen = openInputFileBinary(tryPath, in2, inBoost2, "noerror");
#endif
filename = tryPath;

if (ableToOpen) { break; }
}
}
}

//if you can't open it, try output location
if (!ableToOpen) {
string outputDir = ""; if (outputDirs.size() != 0) { outputDir = outputDirs[0]; }
if (outputDir != "") { //default path is set
string tryPath = outputDir + getSimpleName(filename);
m->mothurOut("Unable to open " + filename + ". Trying output directory " + tryPath+ ".\n");
ifstream in2;
#ifdef USE_BOOST
boost::iostreams::filtering_istream inBoost2;
ableToOpen = openInputFileBinary(tryPath, in2, inBoost2, "noerror");
#endif
filename = tryPath;
}
}

//if you can't open it, try default locations
if (!ableToOpen) {
for (int i = 0; i < mothurFilesPaths.size(); i++) {
string defaultPath = mothurFilesPaths[i];

if (defaultPath != "") { //default path is set
string tryPath = defaultPath + getSimpleName(filename);
m->mothurOut("Unable to open " + filename + ". Trying MOTHUR_FILES directory " + tryPath+ ".\n");
ifstream in2;
#ifdef USE_BOOST
boost::iostreams::filtering_istream inBoost2;
ableToOpen = openInputFileBinary(tryPath, in2, inBoost2, "noerror");
#endif
filename = tryPath;

if (ableToOpen) { break; }
}
}
}

//if you can't open it its not in current working directory or inputDir, try mothur excutable location
if (!ableToOpen) {
string mothurPath = ""; if (mothurPaths.size() != 0) { mothurPath = mothurPaths[0]; }
string tryPath = mothurPath + getSimpleName(filename);
m->mothurOut("Unable to open " + filename + ". Trying mothur's executable directory " + tryPath+ ".\n");
ifstream in2;
#ifdef USE_BOOST
boost::iostreams::filtering_istream inBoost2;
ableToOpen = openInputFileBinary(tryPath, in2, inBoost2, "noerror");
#endif
filename = tryPath;
}

//if you can't open it its not in current working directory or inputDir, try mothur excutable location
if (!ableToOpen) {
for (int i = 0; i < mothurToolsPaths.size(); i++) {
string defaultPath = mothurToolsPaths[i];

if (defaultPath != "") { //default path is set
string tryPath = defaultPath + getSimpleName(filename);
m->mothurOut("Unable to open " + filename + ". Trying MOTHUR_TOOLS directory " + tryPath+ ".\n");
ifstream in2;
#ifdef USE_BOOST
boost::iostreams::filtering_istream inBoost2;
ableToOpen = openInputFileBinary(tryPath, in2, inBoost2, "noerror");
#endif
filename = tryPath;

if (ableToOpen) { break; }
}
}
}

if (!ableToOpen) { m->mothurOut("Unable to open " + filename + ".\n"); return false; }

return true;
}
catch(exception& e) {
m->errorOut(e, "Utils", "checkLocations");
exit(1);
}
}
/***********************************************************************/
bool Utils::checkLocations(string& filename, vector< vector<string> > locations, string silent){
try {
filename = getFullPathName(filename);
Expand Down
1 change: 1 addition & 0 deletions source/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class Utils {
bool checkSpecificLocations(string&, vector<string>, string silent);
bool checkLocations(string&, vector< vector<string> >, string silent);
bool checkLocations(string&, vector< vector<string> >); //filename, locations to check. Returns false if cant be found. If found completes name with location
bool checkLocationsGZ(string&, vector< vector<string> >);
bool dirCheckWritable(string&); //completes path, appends appropriate / or \, makes sure dir is writable.
bool dirCheckExists(string&);
bool dirCheckExists(string&, bool); //completes path, appends appropriate / or \, makes sure dir is present.
Expand Down
59 changes: 59 additions & 0 deletions source/validparameter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,65 @@ vector<string> ValidParameters::validFiles(map<string, string>& container, strin
}
}
/******************************************************/
vector<string> ValidParameters::validFastqGZFiles(map<string, string>& container, string parameter, bool& gz) {
try {
vector<string> vFiles;
Utils util;
bool openedAtLeastOne = false; bool allGZ = true; bool allPlainTxt = true;

map<string, string>::iterator it = container.find(parameter);
if(it != container.end()){ //no parameter given
if ((it->second == "NONE") || (it->second == "none")) {it->second = "NONE";}//ignore
else {

vector<string> files; util.splitAtDash(it->second, files);

//check for gz
for (int i = 0; i < files.size(); i++) {
#ifdef USE_BOOST
if (util.isGZ(files[i])[1]) { allPlainTxt = false; }
else { allGZ = false; }

if (!allGZ && !allPlainTxt) { //mixed bag of files, uh oh...
m->mothurOut("[ERROR]: Your files must all be in compressed .gz form or all in plain text form. Please correct. \n"); m->setControl_pressed(true); }
#else
allGZ=false;
#endif
}

if (allGZ) { gz = true; } else { gz = false; }

for (int i = 0; i < files.size(); i++) {

files[i] = util.removeQuotes(files[i]);
string filename = files[i];

if (!gz) {
if (util.checkLocations(filename, current->getLocations())) { vFiles.push_back(filename); container[parameter] = filename; openedAtLeastOne = true; }
else { m->mothurOut("Unable to open " + filename + ", skipping.\n"); }
}else {
if (util.checkLocationsGZ(filename, current->getLocations())) { vFiles.push_back(filename); container[parameter] = filename; openedAtLeastOne = true; }
else { m->mothurOut("Unable to open " + filename + ", skipping.\n"); }
}
//check for blank file
if (openedAtLeastOne) {
if (util.isBlank(container[parameter])) { m->mothurOut("[ERROR]: " + filename + " is blank, skipping.\n"); }
}
}

if (!openedAtLeastOne) { vFiles.push_back("not open"); }
}
}else { gz = false; return vFiles; }

return vFiles;

}
catch(exception& e) {
m->errorOut(e, "ValidParameters", "validFile");
exit(1);
}
}
/******************************************************/
string ValidParameters::validFile(map<string, string>& container, string parameter) {
try {
bool ableToOpen = false;
Expand Down
3 changes: 2 additions & 1 deletion source/validparameter.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ class ValidParameters {
bool isValidParameter(string, vector<string>, string);
vector <string> addParameters(string[], int);
void initParameterRanges();
vector<string> validFiles(map<string, string>&, string);
vector<string> validFiles(map<string, string>&, string);
vector<string> validFastqGZFiles(map<string, string>&, string, bool&);
string validFile(map<string, string>&, string);
string valid(map<string, string>&, string);
string validPath(map<string, string>&, string);
Expand Down

0 comments on commit 603f4be

Please sign in to comment.