Permalink
Browse files

Adds runsensspec parameter to cluster.split

  • Loading branch information...
1 parent 6659a25 commit dbc624bbe7ebb3a5da669ccbbf273f7d1884e3d4 @mothur-westcott mothur-westcott committed Jan 31, 2017
@@ -25,6 +25,7 @@ vector<string> ClusterSplitCommand::setParameters(){
CommandParameter psplitmethod("splitmethod", "Multiple", "classify-fasta-distance", "distance", "", "", "","",false,false,true); parameters.push_back(psplitmethod);
CommandParameter plarge("large", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(plarge);
CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pshowabund);
+ CommandParameter prunspenspec("runsensspec", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(prunspenspec);
CommandParameter pcluster("cluster", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pcluster);
CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptiming);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
@@ -55,7 +56,7 @@ vector<string> ClusterSplitCommand::setParameters(){
string ClusterSplitCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The cluster.split command parameter options are file, fasta, phylip, column, name, count, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, large, cluster, iters, delta, initialize, dist, processors. Fasta or Phylip or column and name are required.\n";
+ helpString += "The cluster.split command parameter options are file, fasta, phylip, column, name, count, cutoff, precision, method, splitmethod, taxonomy, taxlevel, showabund, timing, large, cluster, iters, delta, initialize, dist, processors, runsensspec. Fasta or Phylip or column and name are required.\n";
helpString += "The cluster.split command can split your files in 3 ways. Splitting by distance file, by classification, or by classification also using a fasta file. \n";
helpString += "For the distance file method, you need only provide your distance file and mothur will split the file into distinct groups. \n";
helpString += "For the classification method, you need to provide your distance file and taxonomy file, and set the splitmethod to classify. \n";
@@ -75,6 +76,7 @@ string ClusterSplitCommand::getHelpString(){
helpString += "The metric parameter allows to select the metric in the opticluster method. Options are Matthews correlation coefficient (mcc), sensitivity (sens), specificity (spec), true positives + true negatives (tptn), false positives + false negatives (fpfn), true positives (tp), true negative (tn), false positive (fp), false negative (fn), f1score (f1score), accuracy (accuracy), positive predictive value (ppv), negative predictive value (npv), false discovery rate (fdr). Default=mcc.\n";
helpString += "The delta parameter allows to set the stable value for the metric in the opticluster method. Default=0.0001\n";
helpString += "The initialize parameter allows to select the initial randomization for the opticluster method. Options are singleton, meaning each sequence is randomly assigned to its own OTU, or oneotu meaning all sequences are assigned to one otu. Default=singleton.\n";
+ helpString += "The runsensspec parameter allows to run the sens.spec command on the completed list file. Default=true.\n";
helpString += "The method parameter allows you to enter your clustering mothod. Options are furthest, nearest, average, weighted, agc, dgc and opti. Default=opti. The agc and dgc methods require a fasta file.";
helpString += "The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n";
helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n";
@@ -358,6 +360,9 @@ ClusterSplitCommand::ClusterSplitCommand(string option) {
temp = validParameter.validFile(parameters, "classic", false); if (temp == "not found") { temp = "F"; }
classic = m->isTrue(temp);
+ temp = validParameter.validFile(parameters, "runsensspec", false); if (temp == "not found") { temp = "T"; }
+ runsensSpec = m->isTrue(temp);
+
//not using file option and don't have fasta method with classic
if (((splitmethod != "fasta") && classic) && (file == "")) { m->mothurOut("[ERROR]: splitmethod must be fasta to use cluster.classic, or you must use the file option.\n"); abort=true; }
@@ -590,7 +595,7 @@ int ClusterSplitCommand::execute(){
m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to merge."); m->mothurOutEndLine();
- if (method == "opti") { runSensSpec(); }
+ if ((method == "opti") && (runsensSpec)) { runSensSpec(); }
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
@@ -1662,7 +1667,7 @@ int ClusterSplitCommand::runSensSpec() {
}else if (columnfile != "") { columnFile = columnfile; }
else { phylipFile = phylipfile; }
- string inputString = "list=" + listFile;
+ string inputString = "cutoff=" + toString(cutoff) + ", list=" + listFile;
if (columnFile != "") { inputString += ", column=" + columnFile; }
else if (phylipfile != "") { inputString += ", phylip=" + phylipfile; }
else { m->mothurOut("[WARNING]: Cannot run sens.spec analysis without a phylip or column file, skipping."); return 0; }
@@ -53,7 +53,7 @@ class ClusterSplitCommand : public Command {
string file, method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, countfile, distfile, format, showabund, timing, splitmethod, taxFile, fastafile, inputDir, vsearchLocation, metric, initialize;
double cutoff, splitcutoff, stableMetric;
int precision, length, processors, taxLevelCutoff, maxIters;
- bool print_start, abort, large, classic, runCluster, deleteFiles, isList, cutoffNotSet, makeDist;
+ bool print_start, abort, large, classic, runCluster, deleteFiles, isList, cutoffNotSet, makeDist, runsensSpec;
time_t start;
ofstream outList, outRabund, outSabund;
long long numSingletons;
@@ -419,7 +419,7 @@ int SensSpecCommand::process(ListVector*& list, bool& getCutoff, string& origCut
}
}
}
- cout << m->getRAMUsed()/(double)GIG << endl;
+
falseNegatives = distanceMap.size();
trueNegatives = numSeqs * (numSeqs-1)/2 - (falsePositives + falseNegatives + truePositives);

0 comments on commit dbc624b

Please sign in to comment.