Permalink
Browse files

Fixed issue with nameMap in optimatrix read

  • Loading branch information...
1 parent 61e5529 commit 9ca9b2af415c074f48c740b4d25142138697e71e @mothur-westcott mothur-westcott committed Dec 12, 2016
@@ -11,9 +11,9 @@
#include "readphylip.h"
#include "readcolumn.h"
#include "readmatrix.hpp"
-
#include "sequence.hpp"
#include "systemcommand.h"
+#include "sensspeccommand.h"
//**********************************************************************************************************************
vector<string> ClusterCommand::setParameters(){
@@ -889,7 +889,7 @@ int ClusterCommand::runOptiCluster(){
long long numBins = cluster.getNumBins();
m->mothurOut("\n\niter\ttime\tlabel\tnum_otus\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n");
outStep << "iter\ttime\tlabel\tnum_otus\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";
- long long tp, tn, fp, fn;
+ long long tp, tn, fp, fn;
vector<double> results = cluster.getStats(tp, tn, fp, fn);
m->mothurOut("0\t0\t" + toString(cutoff) + "\t" + toString(numBins) + "\t"+ toString(cutoff) + "\t" + toString(tp) + "\t" + toString(tn) + "\t" + toString(fp) + "\t" + toString(fn) + "\t");
outStep << "0\t0\t" + toString(cutoff) + "\t" + toString(numBins) + "\t" + toString(cutoff) + "\t" << tp << '\t' << tn << '\t' << fp << '\t' << fn << '\t';
@@ -654,16 +654,18 @@ map<double, int> ClusterSplitCommand::completeListFile(vector<string> listNames,
if (countfile != "") { m->getline(in); m->gobble(in); }
while (!in.eof()) {
- in >> firstCol >> secondCol; m->getline(in); m->gobble(in);
+ in >> firstCol >> secondCol;
+ m->getline(in);
if (countfile == "") { listSingle->push_back(secondCol); }
else { listSingle->push_back(firstCol); }
+ m->gobble(in);
}
in.close();
m->mothurRemove(singleton);
numSingleBins = listSingle->getNumBins();
- }else{ listSingle = NULL; numSingleBins = 0; }
+ }else{ listSingle = NULL; numSingleBins = 0; }
//go through users set and make them floats so we can sort them
double tcutoff = cutoff * 1000; tcutoff = ceil(tcutoff);
@@ -51,11 +51,12 @@ class ClusterSplitCommand : public Command {
vector<string> outputNames;
string file, method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, countfile, distfile, format, showabund, timing, splitmethod, taxFile, fastafile, inputDir, vsearchLocation, metric, initialize;
- double cutoff, splitcutoff, stableMetric, numSingletons;
+ double cutoff, splitcutoff, stableMetric;
int precision, length, processors, taxLevelCutoff, maxIters;
bool print_start, abort, large, classic, runCluster, deleteFiles, isList, cutoffNotSet, makeDist;
time_t start;
ofstream outList, outRabund, outSabund;
+ long long numSingletons;
void printData(ListVector*);
vector<string> createProcesses(vector< map<string, string> >, set<string>&);
@@ -240,7 +240,7 @@ int DistanceCommand::execute(){
if (m->control_pressed) { return 0; }
int numSeqs = alignDB.getNumSeqs();
- cutoff += 0.005;
+ //cutoff += 0.005;
if (!alignDB.sameLength()) { m->mothurOut("[ERROR]: your sequences are not the same length, aborting."); m->mothurOutEndLine(); return 0; }
@@ -215,12 +215,21 @@ int OptiMatrix::readPhylip(){
closeness.resize(nonSingletonCount);
+ map<string, string> names;
+ if (namefile != "") { m->readNames(namefile, names); }
+
Progress* reading;
ifstream in;
m->openInputFile(distFile, in);
in >> nseqs >> name;
+ int newA = singletonIndexSwap[0];
+ if (namefile != "") {
+ name = names[name]; //redundant names
+ }
+ nameMap[newA] = name;
+
int fivepercent = (int)(0.05 * nseqs);
string line = "";
@@ -246,6 +255,11 @@ int OptiMatrix::readPhylip(){
int newA = singletonIndexSwap[i];
closeness[newA].insert(newB);
closeness[newB].insert(newA);
+
+ if (namefile != "") {
+ name = names[name]; //redundant names
+ }
+ nameMap[newA] = name;
}
index++; reading->update(index);
}
@@ -278,6 +292,11 @@ int OptiMatrix::readPhylip(){
int newA = singletonIndexSwap[i];
closeness[newA].insert(newB);
closeness[newB].insert(newA);
+
+ if (namefile != "") {
+ name = names[name]; //redundant names
+ }
+ nameMap[newA] = name;
}
index++; reading->update(index);
}
@@ -297,23 +316,6 @@ int OptiMatrix::readPhylip(){
if (m->debug) { unsigned long long ramUsed = m->getRAMUsed(); unsigned long long total = m->getTotalRAM();
m->mothurOut("\nCurrent RAM usage: " + toString(ramUsed/(double)GIG) + " Gigabytes. Total Ram: " + toString(total/(double)GIG) + " Gigabytes.\n"); }
- if (namefile != "") {
- map<string, string> names;
- m->readNames(namefile, names);
- //update nameMap
- for (int i = 0; i < nameMap.size(); i++) {
- map<string, string>::iterator it = names.find(nameMap[i]);
- nameMap[i] = it->second; //we know its there because we read it above
- }
- names.clear();
- }
-
- for (int i = 0; i < closeness.size(); i++) {
- string newName = nameMap[i];
- int newIndex = singletonIndexSwap[i];
- nameMap[newIndex] = newName;
- }
-
return 0;
}
@@ -389,6 +391,9 @@ int OptiMatrix::readColumn(){
closeness.resize(nonSingletonCount);
+ map<string, string> names;
+ if (namefile != "") { m->readNames(namefile, names); }
+
while(in){ //let's assume it's a triangular matrix...
in >> firstName; m->gobble(in);
@@ -416,29 +421,19 @@ int OptiMatrix::readColumn(){
int newA = singletonIndexSwap[indexA];
closeness[newA].insert(newB);
closeness[newB].insert(newA);
+
+ if (namefile != "") {
+ firstName = names[firstName]; //redundant names
+ secondName = names[secondName]; //redundant names
+ }
+
+ nameMap[newA] = firstName;
+ nameMap[newB] = secondName;
}
}
in.close();
nameAssignment.clear();
- if (namefile != "") {
- map<string, string> names;
- m->readNames(namefile, names);
- //update nameMap
- for (int i = 0; i < nameMap.size(); i++) {
- map<string, string>::iterator it = names.find(nameMap[i]);
- nameMap[i] = it->second; //we know its there because we read it above
- }
- names.clear();
- }
-
-
- for (int i = 0; i < closeness.size(); i++) {
- string newName = nameMap[i];
- int newIndex = singletonIndexSwap[i];
- nameMap[newIndex] = newName;
- }
-
return 1;
}
View
@@ -171,10 +171,7 @@ bool OptiCluster::update(double& listMetric) {
bins[binNumber].erase(remove(bins[binNumber].begin(), bins[binNumber].end(), seqNumber), bins[binNumber].end()); //remove from old bin i
}
- if (usedInsert) {
- if (bins[binNumber].size() == 0) { insertLocation = binNumber; } //set flag if old bin is empty.
- insertLocation = findInsert();
- }
+ if (usedInsert) { insertLocation = findInsert(); }
//update seqBins
seqBin[seqNumber] = bestBin; //set new OTU location
View
@@ -23,7 +23,7 @@ class OptiCluster : public Cluster {
#endif
public:
- OptiCluster(OptiMatrix* mt, string met, double ns) : Cluster() {
+ OptiCluster(OptiMatrix* mt, string met, long long ns) : Cluster() {
m = MothurOut::getInstance(); matrix = mt; metric = met; truePositives = 0; trueNegatives = 0; falseNegatives = 0; falsePositives = 0; numSingletons = ns;
}
~OptiCluster() {}
@@ -26,7 +26,7 @@ class SplitMatrix {
int split();
vector< map<string, string> > getDistanceFiles(); //returns map of distance files -> namefile sorted by distance file size
string getSingletonNames() { return singleton; } //returns namesfile containing singletons
- int getNumSingleton() { return numSingleton; } //returns namesfile containing singletons
+ long long getNumSingleton() { return numSingleton; } //returns namesfile containing singletons
private:
MothurOut* m;
@@ -35,7 +35,8 @@ class SplitMatrix {
vector< map< string, string> > dists;
float cutoff, distCutoff;
bool large, classic;
- int processors, numSingleton;
+ int processors;
+ long long numSingleton;
int splitDistance();
int splitClassify();

0 comments on commit 9ca9b2a

Please sign in to comment.