Permalink
Browse files

Fixes hard issue in cluster commands

  • Loading branch information...
1 parent 9959ced commit d018aabd3cbcd494aa9c4f966ee2b1d6e68a24be @mothur-westcott mothur-westcott committed Aug 8, 2016
@@ -51,7 +51,7 @@ vector<string> ClusterCommand::setParameters(){
string ClusterCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The cluster command parameter options are phylip, column, name, count, method, cuttoff, hard, precision, sim, showabund, metric, delta, iters and timing. Fasta or Phylip or column and name are required.\n";
+ helpString += "The cluster command parameter options are phylip, column, name, count, method, cutoff, precision, sim, showabund and timing. Fasta or Phylip or column and name are required.\n";
//helpString += "The adjust parameter is used to handle missing distances. If you set a cutoff, adjust=f by default. If not, adjust=t by default. Adjust=f, means ignore missing distances and adjust cutoff as needed with the average neighbor method. Adjust=t, will treat missing distances as 1.0. You can also set the value the missing distances should be set to, adjust=0.5 would give missing distances a value of 0.5.\n";
helpString += "The phylip and column parameter allow you to enter your distance file. \n";
helpString += "The fasta parameter allows you to enter your fasta file for use with the agc or dgc methods. \n";
@@ -274,13 +274,6 @@ ClusterCommand::ClusterCommand(string option) {
temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
m->mothurConvert(temp, maxIters);
- //bool cutoffSet = false;
- temp = validParameter.validFile(parameters, "cutoff", false);
- if (temp == "not found") { temp = "1.0"; cutoffNotSet = true; }
- //else { cutoffSet = true; }
- m->mothurConvert(temp, cutoff);
- cutoff += (5 / (precision * 10.0));
-
//temp = validParameter.validFile(parameters, "adjust", false); if (temp == "not found") { temp = "F"; }
//if (m->isNumeric1(temp)) { m->mothurConvert(temp, adjust); }
//else if (m->isTrue(temp)) { adjust = 1.0; }
@@ -314,10 +307,10 @@ ClusterCommand::ClusterCommand(string option) {
if ((method == "agc") || (method == "dgc")) { m->mothurOut("[ERROR]: The agc and dgc clustering methods are not available for Windows, aborting\n."); abort = true; }
#endif
- //bool cutoffSet = false;
+ cutOffSet = false;
temp = validParameter.validFile(parameters, "cutoff", false);
if (temp == "not found") { temp = "10"; }
- //else { cutoffSet = true; }
+ else { cutOffSet = true; }
m->mothurConvert(temp, cutoff);
showabund = validParameter.validFile(parameters, "showabund", false);
@@ -842,8 +835,7 @@ int ClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVecto
int ClusterCommand::runOptiCluster(){
try {
- if (cutoffNotSet) { m->mothurOut("\nYou did not set a cutoff, using 0.03.\n"); cutoff = 0.03; cutoff += (5 / (precision * 10.0)); }
- cutoff -= (5 / (precision * 10.0));
+ if (!cutOffSet) { m->mothurOut("\nYou did not set a cutoff, using 0.03.\n"); cutoff = 0.03; }
string nameOrCount = "";
string thisNamefile = "";
@@ -56,7 +56,7 @@ class ClusterCommand : public Command {
RAbundVector oldRAbund;
ListVector oldList;
- bool abort, hard, sim, cutoffNotSet;
+ bool abort, sim, cutOffSet;
string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, format, distfile, countfile, fastafile, inputDir, vsearchLocation, metric;
double cutoff, stableMetric;
@@ -34,8 +34,7 @@ vector<string> ClusterSplitCommand::setParameters(){
CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted-agc-dgc-opti", "average", "", "", "","",false,false,true); parameters.push_back(pmethod);
CommandParameter pmetric("metric", "Multiple", "mcc-sens-spec-tptn-fpfn-tp-tn-fp-fn-f1score-accuracy-ppv-npv-fdr", "mcc", "", "", "","",false,false,true); parameters.push_back(pmetric);
- CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
- CommandParameter pdist("dist", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdist);
+ CommandParameter pdist("dist", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdist);
CommandParameter pislist("islist", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pislist);
CommandParameter pclassic("classic", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pclassic);
CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
@@ -55,7 +54,7 @@ vector<string> ClusterSplitCommand::setParameters(){
string ClusterSplitCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The cluster.split command parameter options are file, fasta, phylip, column, name, count, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, hard, large, cluster, iters, delta, dist, processors. Fasta or Phylip or column and name are required.\n";
+ helpString += "The cluster.split command parameter options are file, fasta, phylip, column, name, count, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, large, cluster, iters, delta, dist, processors. Fasta or Phylip or column and name are required.\n";
helpString += "The cluster.split command can split your files in 3 ways. Splitting by distance file, by classification, or by classification also using a fasta file. \n";
helpString += "For the distance file method, you need only provide your distance file and mothur will split the file into distinct groups. \n";
helpString += "For the classification method, you need to provide your distance file and taxonomy file, and set the splitmethod to classify. \n";
@@ -359,7 +358,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option) {
//not using file option and don't have fasta method with classic
if (((splitmethod != "fasta") && classic) && (file == "")) { m->mothurOut("[ERROR]: splitmethod must be fasta to use cluster.classic, or you must use the file option.\n"); abort=true; }
-
+
temp = validParameter.validFile(parameters, "taxlevel", false); if (temp == "not found") { temp = "3"; }
m->mothurConvert(temp, taxLevelCutoff);
@@ -389,12 +388,9 @@ ClusterSplitCommand::ClusterSplitCommand(string option) {
#endif
cutoffNotSet = false;
-
temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { cutoffNotSet = true; temp = "1.0"; }
m->mothurConvert(temp, cutoff);
-
- if (method != "opti") { cutoff += (5 / (precision * 10.0)); }
-
+
if ((splitmethod == "distance") || (splitmethod == "classify") || (splitmethod == "fasta")) { }
else { m->mothurOut("[ERROR]: " + splitmethod + " is not a valid splitting method. Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); abort = true; }
@@ -1418,11 +1414,7 @@ string ClusterSplitCommand::runOptiCluster(string thisDistFile, string thisNamef
ListVector* list = cluster.getList();
list->setLabel(toString(smallestCutoff));
- if (hard) {
- cutoff = m->ceilDist(cutoff, precision);
- }else{
- cutoff = m->roundDist(cutoff, precision);
- }
+ cutoff = m->ceilDist(cutoff, precision);
labels.insert(toString(cutoff));
ofstream listFile;
@@ -1529,6 +1521,10 @@ int ClusterSplitCommand::vsearchDriver(string inputFile, string ucClusteredFile,
char* maxaccepts = new char[16]; maxaccepts[0] = '\0'; strncat(maxaccepts, "--maxaccepts=16", 15);
vsearchParameters.push_back(maxaccepts);
+ //--threads=1
+ char* threads = new char[12]; threads[0] = '\0'; strncat(threads, "--threads=1", 11);
+ vsearchParameters.push_back(threads);
+
//--usersort
char* usersort = new char[11]; usersort[0] = '\0'; strncat(usersort, "--usersort", 10);
vsearchParameters.push_back(usersort);
@@ -1550,10 +1546,6 @@ int ClusterSplitCommand::vsearchDriver(string inputFile, string ucClusteredFile,
char* wordlength = new char[15]; wordlength[0] = '\0'; strncat(wordlength, "--wordlength=8", 14);
vsearchParameters.push_back(wordlength);
- //--threads=1
- char* threads = new char[12]; threads[0] = '\0'; strncat(threads, "--threads=1", 11);
- vsearchParameters.push_back(threads);
-
//--uc=$ROOT.clustered.uc
string tempIn = "--uc=" + ucClusteredFile;
char* uc = new char[tempIn.length()+1]; uc[0] = '\0'; strncat(uc, tempIn.c_str(), tempIn.length());
@@ -53,8 +53,7 @@ class ClusterSplitCommand : public Command {
string file, method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, countfile, distfile, format, showabund, timing, splitmethod, taxFile, fastafile, inputDir, vsearchLocation, metric;
double cutoff, splitcutoff, stableMetric, numSingletons;
int precision, length, processors, taxLevelCutoff, maxIters;
- bool print_start, abort, hard, large, classic, runCluster, deleteFiles, isList, cutoffNotSet, makeDist;
-
+ bool print_start, abort, large, classic, runCluster, deleteFiles, isList, cutoffNotSet, makeDist;
time_t start;
ofstream outList, outRabund, outSabund;
@@ -20,7 +20,6 @@ vector<string> MGClusterCommand::setParameters(){
CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "","",false,false,true); parameters.push_back(pcutoff);
CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "","",false,false); parameters.push_back(pmethod);
- CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
CommandParameter pmin("min", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmin);
CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmerge);
CommandParameter padjust("adjust", "String", "", "F", "", "", "","",false,false); parameters.push_back(padjust);
@@ -42,7 +41,7 @@ vector<string> MGClusterCommand::setParameters(){
string MGClusterCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard, method, merge, min, length, penalty and adjust. The blast parameter is required.\n";
+ helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, method, merge, min, length, penalty and adjust. The blast parameter is required.\n";
helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similar to the OTUs.\n";
helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
@@ -190,8 +189,7 @@ MGClusterCommand::MGClusterCommand(string option) {
temp = validParameter.validFile(parameters, "cutoff", false);
if (temp == "not found") { temp = "0.70"; }
else { cutoffSet = true; }
- m->mothurConvert(temp, cutoff);
- cutoff += (5 / (precision * 10.0));
+ m->mothurConvert(temp, cutoff);
method = validParameter.validFile(parameters, "method", false);
if (method == "not found") { method = "average"; }
@@ -210,9 +208,6 @@ MGClusterCommand::MGClusterCommand(string option) {
temp = validParameter.validFile(parameters, "merge", false); if (temp == "not found") { temp = "true"; }
merge = m->isTrue(temp);
-
- temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; }
- hard = m->isTrue(temp);
temp = validParameter.validFile(parameters, "adjust", false); if (temp == "not found") { if (cutoffSet) { temp = "F"; }else { temp="T"; } }
if (m->isNumeric1(temp)) { m->mothurConvert(temp, adjust); }
@@ -342,12 +337,7 @@ int MGClusterCommand::execute(){
}
float dist = distMatrix->getSmallDist();
- float rndDist;
- if (hard) {
- rndDist = m->ceilDist(dist, precision);
- }else{
- rndDist = m->roundDist(dist, precision);
- }
+ float rndDist = m->ceilDist(dist, precision);
if(previousDist <= 0.0000 && dist != previousDist){
oldList.setLabel("unique");
@@ -433,9 +423,8 @@ int MGClusterCommand::execute(){
m->mothurOutEndLine();
if (saveCutoff != cutoff) {
- if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); }
- else { saveCutoff = m->roundDist(saveCutoff, precision); }
-
+ saveCutoff = m->ceilDist(saveCutoff, precision);
+
m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();
}
@@ -56,7 +56,7 @@ class MGClusterCommand : public Command {
double cutoff;
float penalty, adjust;
int precision, length, precisionLength;
- bool abort, minWanted, hclusterWanted, merge, hard, cutoffSet;
+ bool abort, minWanted, hclusterWanted, merge, cutoffSet;
void printData(ListVector*, map<string, int>&);
ListVector* mergeOPFs(map<string, int>, float);
@@ -20,7 +20,6 @@ vector<string> SensSpecCommand::setParameters(){
CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "","",false,false); parameters.push_back(pcutoff);
CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
- CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
@@ -223,13 +222,6 @@ SensSpecCommand::SensSpecCommand(string option) {
outputDir += m->hasPath(listFile); //if user entered a file with a path then preserve it
}
- //check for optional parameter and set defaults
- // ...at some point should added some additional type checking...
- temp = validParameter.validFile(parameters, "hard", false);
- if (temp == "not found"){ hard = 1; }
- else if(!m->isTrue(temp)) { hard = 0; }
- else if(m->isTrue(temp)) { hard = 1; }
-
temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "-1.00"; }
m->mothurConvert(temp, cutoff);
@@ -310,7 +302,6 @@ int SensSpecCommand::process(map<string, int>& seqMap, ListVector*& list, bool&
if(label != "unique"){
origCutoff = label;
convert(label, cutoff);
- if(!hard){ cutoff += (0.49 / double(precision)); }
}
else{
origCutoff = "unique";
@@ -459,9 +450,8 @@ int SensSpecCommand::processListFile(){
string origCutoff = "";
bool getCutoff = 0;
- if(cutoff == -1.00) { getCutoff = 1; }
- else if( !hard ) { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
- else { origCutoff = toString(cutoff); }
+ if(cutoff == -1.00) { getCutoff = 1; }
+ else { origCutoff = m->ceilDist(cutoff, precision); }
map<string, int> seqMap;
@@ -49,7 +49,6 @@ class SensSpecCommand : public Command {
long int truePositives, falsePositives, trueNegatives, falseNegatives;
bool abort, allLines, square;
- bool hard;
double cutoff;
int precision;

0 comments on commit d018aab

Please sign in to comment.