Skip to content
Browse files

Dead code removal

  • Loading branch information...
1 parent b6a40a8 commit ddee4c7542a6ed38f56b99a918a44c59a17cd779 @jts committed Mar 12, 2013
Showing with 2 additions and 510 deletions.
  1. +1 −18 src/SGA/assemble.cpp
  2. +1 −374 src/StringGraph/SGVisitors.cpp
  3. +0 −118 src/StringGraph/SGVisitors.h
View
19 src/SGA/assemble.cpp
@@ -79,13 +79,12 @@ namespace opt
static int maxIndelLength = 20;
//
- static int coverageCutoff = 0;
static bool bValidate;
static bool bExact = true;
static bool bPerformTR = false;
}
-static const char* shortopts = "p:o:m:d:g:b:a:c:r:x:l:sv";
+static const char* shortopts = "p:o:m:d:g:b:a:r:x:l:sv";
enum { OPT_HELP = 1, OPT_VERSION, OPT_VALIDATE, OPT_EDGESTATS, OPT_EXACT, OPT_MAXINDEL, OPT_TR, OPT_MAXEDGES };
@@ -97,7 +96,6 @@ static const struct option longopts[] = {
{ "cut-terminal", required_argument, NULL, 'x' },
{ "min-branch-length", required_argument, NULL, 'l' },
{ "resolve-small", required_argument, NULL, 'r' },
- { "coverage", required_argument, NULL, 'c' },
{ "max-divergence", required_argument, NULL, 'd' },
{ "max-gap-divergence", required_argument, NULL, 'g' },
{ "max-indel", required_argument, NULL, OPT_MAXINDEL },
@@ -136,10 +134,7 @@ void assemble()
// Visitor functors
SGTransitiveReductionVisitor trVisit;
SGGraphStatsVisitor statsVisit;
- SGRemodelVisitor remodelVisit;
- SGEdgeStatsVisitor edgeStatsVisit;
SGTrimVisitor trimVisit(opt::trimLengthThreshold);
-
SGContainRemoveVisitor containVisit;
SGValidateStructureVisitor validationVisit;
@@ -197,17 +192,6 @@ void assemble()
pGraph->visit(statsVisit);
}
- //
- if(opt::coverageCutoff > 0)
- {
- std::cout << "Coverage visit\n";
- SGCoverageVisitor coverageVisit(opt::coverageCutoff);
- pGraph->visit(coverageVisit);
- pGraph->visit(trimVisit);
- pGraph->visit(trimVisit);
- pGraph->visit(trimVisit);
- }
-
// Peform another round of simplification
pGraph->simplify();
@@ -262,7 +246,6 @@ void parseAssembleOptions(int argc, char** argv)
case 'g': arg >> opt::maxBubbleGapDivergence; break;
case 's': opt::bSmoothGraph = true; break;
case 'x': arg >> opt::numTrimRounds; break;
- case 'c': arg >> opt::coverageCutoff; break;
case 'r': arg >> opt::resolveSmallRepeatLen; break;
case OPT_MAXEDGES: arg >> opt::maxEdges; break;
case OPT_TR: opt::bPerformTR = true; break;
View
375 src/StringGraph/SGVisitors.cpp
@@ -25,22 +25,6 @@ bool SGFastaVisitor::visit(StringGraph* /*pGraph*/, Vertex* pVertex)
return false;
}
-
-//
-// SGOverlapWriterVisitor - write all the overlaps in the graph to a file
-//
-bool SGOverlapWriterVisitor::visit(StringGraph* /*pGraph*/, Vertex* pVertex)
-{
- EdgePtrVec edges = pVertex->getEdges();
- for(size_t i = 0; i < edges.size(); ++i)
- {
- Overlap ovr = edges[i]->getOverlap();
- if(ovr.id[0] < ovr.id[1])
- m_fileHandle << ovr << "\n";
- }
- return false;
-}
-
//
// SGTransRedVisitor - Perform a transitive reduction about this vertex
// This uses Myers' algorithm (2005, The fragment assembly string graph)
@@ -58,6 +42,7 @@ void SGTransitiveReductionVisitor::previsit(StringGraph* pGraph)
marked_edges = 0;
}
+//
bool SGTransitiveReductionVisitor::visit(StringGraph* /*pGraph*/, Vertex* pVertex)
{
size_t trans_count = 0;
@@ -315,228 +300,6 @@ bool SGValidateStructureVisitor::visit(StringGraph* pGraph, Vertex* pVertex)
}
//
-// SGRemodelVisitor - Remodel the graph to infer missing edges or remove erroneous edges
-//
-void SGRemodelVisitor::previsit(StringGraph* pGraph)
-{
- m_remodelER = 0.02;
- pGraph->setColors(GC_WHITE);
-}
-
-bool SGRemodelVisitor::visit(StringGraph* pGraph, Vertex* pVertex)
-{
- bool graph_changed = false;
-
- // Construct the set of overlaps reachable within the current parameters
- CompleteOverlapSet vertexOverlapSet(pVertex, m_remodelER, pGraph->getMinOverlap());
- SGAlgorithms::EdgeDescOverlapMap containMap;
- vertexOverlapSet.computeIrreducible(NULL, &containMap);
- SGAlgorithms::EdgeDescOverlapMap irreducibleMap = vertexOverlapSet.getOverlapMap();
-
- // Construct the set of edges that should be added
- EdgePtrVec edges = pVertex->getEdges();
- for(size_t i = 0; i < edges.size(); ++i)
- {
- SGAlgorithms::EdgeDescOverlapMap::iterator iter = irreducibleMap.find(edges[i]->getDesc());
- if(iter != irreducibleMap.end())
- {
- // Edge exists already
- irreducibleMap.erase(iter);
- }
- else
- {
- edges[i]->setColor(GC_BLACK);
- edges[i]->getTwin()->setColor(GC_BLACK);
- //std::cout << "Marking edge for deletion: " << edges[i]->getOverlap() << "\n";
- }
- }
-
- // Add remaining edges in the irreducible map
- SGAlgorithms::EdgeDescOverlapMap::iterator iter;
- for(iter = irreducibleMap.begin(); iter != irreducibleMap.end(); ++iter)
- {
- Overlap& ovr = iter->second;
- //std::cout << "Adding overlap: " << ovr << "\n";
- SGAlgorithms::createEdgesFromOverlap(pGraph, ovr, false);
- graph_changed = true;
- }
-
- // Update the containment flags in the graph to ensure that we can subsequently remove containment verts
- SGAlgorithms::updateContainFlags(pGraph, pVertex, containMap);
-
- return graph_changed;
-}
-
-//
-void SGRemodelVisitor::postvisit(StringGraph* pGraph)
-{
- pGraph->sweepEdges(GC_BLACK);
- pGraph->setErrorRate(m_remodelER);
-}
-
-//
-// SGEdgeStatsVisitor - Compute and display summary statistics of
-// the overlaps in the graph, including edges that were potentially missed
-//
-void SGEdgeStatsVisitor::previsit(StringGraph* pGraph)
-{
- pGraph->setColors(GC_WHITE);
- maxDiff = 0;
- minOverlap = pGraph->getMinOverlap();
- maxOverlap = 0;
-
-}
-
-bool SGEdgeStatsVisitor::visit(StringGraph* pGraph, Vertex* pVertex)
-{
- const int MIN_OVERLAP = pGraph->getMinOverlap();
- const double MAX_ERROR = pGraph->getErrorRate();
-
- static int visited = 0;
- ++visited;
- if(visited % 50000 == 0)
- std::cout << "visited: " << visited << "\n";
-
- // Add stats for the found overlaps
- EdgePtrVec edges = pVertex->getEdges();
- for(size_t i = 0; i < edges.size(); ++i)
- {
- Overlap ovr = edges[i]->getOverlap();
- int numDiff = ovr.match.countDifferences(pVertex->getStr(), edges[i]->getEnd()->getStr());
- int overlapLen = ovr.match.getMinOverlapLength();
- addOverlapToCount(overlapLen, numDiff, foundCounts);
- }
-
- // Explore the neighborhood around this graph for potentially missing overlaps
- CandidateVector candidates = getMissingCandidates(pGraph, pVertex, MIN_OVERLAP);
- MultiOverlap addedMO(pVertex->getID(), pVertex->getStr());
- for(size_t i = 0; i < candidates.size(); ++i)
- {
- Candidate& c = candidates[i];
- int numDiff = c.ovr.match.countDifferences(pVertex->getStr(), c.pEndpoint->getStr());
- double error_rate = double(numDiff) / double(c.ovr.match.getMinOverlapLength());
-
- if(error_rate < MAX_ERROR)
- {
- int overlapLen = c.ovr.match.getMinOverlapLength();
- addOverlapToCount(overlapLen, numDiff, missingCounts);
- }
- }
-
- return false;
-}
-
-//
-void SGEdgeStatsVisitor::postvisit(StringGraph* /*pGraph*/)
-{
- printf("FoundOverlaps\n");
- printCounts(foundCounts);
-
- printf("\nPotentially Missing Overlaps\n\n");
- printCounts(missingCounts);
-}
-
-//
-void SGEdgeStatsVisitor::printCounts(CountMatrix& matrix)
-{
- // Header row
- printf("OL\t");
- for(int j = 0; j <= maxDiff; ++j)
- {
- printf("%d\t", j);
- }
-
- printf("sum\n");
- IntIntMap columnTotal;
- for(int i = minOverlap; i <= maxOverlap; ++i)
- {
- printf("%d\t", i);
- int sum = 0;
- for(int j = 0; j <= maxDiff; ++j)
- {
- int v = matrix[i][j];
- printf("%d\t", v);
- sum += v;
- columnTotal[j] += v;
- }
- printf("%d\n", sum);
- }
-
- printf("total\t");
- int total = 0;
- for(int j = 0; j <= maxDiff; ++j)
- {
- int v = columnTotal[j];
- printf("%d\t", v);
- total += v;
- }
- printf("%d\n", total);
-}
-
-//
-void SGEdgeStatsVisitor::addOverlapToCount(int ol, int nd, CountMatrix& matrix)
-{
- matrix[ol][nd]++;
-
- if(nd > maxDiff)
- maxDiff = nd;
-
- if(ol > maxOverlap)
- maxOverlap = ol;
-}
-
-// Explore the neighborhood around a vertex looking for missing overlaps
-SGEdgeStatsVisitor::CandidateVector SGEdgeStatsVisitor::getMissingCandidates(StringGraph* /*pGraph*/,
- Vertex* pVertex,
- int minOverlap) const
-{
- CandidateVector out;
-
- // Mark the vertices that are reached from this vertex as black to indicate
- // they already are overlapping
- EdgePtrVec edges = pVertex->getEdges();
- for(size_t i = 0; i < edges.size(); ++i)
- {
- edges[i]->getEnd()->setColor(GC_BLACK);
- }
- pVertex->setColor(GC_BLACK);
-
- for(size_t i = 0; i < edges.size(); ++i)
- {
- Edge* pXY = edges[i];
- EdgePtrVec neighborEdges = pXY->getEnd()->getEdges();
- for(size_t j = 0; j < neighborEdges.size(); ++j)
- {
- Edge* pYZ = neighborEdges[j];
- if(pYZ->getEnd()->getColor() != GC_BLACK)
- {
- // Infer the overlap object from the edges
- Overlap ovrXY = pXY->getOverlap();
- Overlap ovrYZ = pYZ->getOverlap();
-
- if(SGAlgorithms::hasTransitiveOverlap(ovrXY, ovrYZ))
- {
- Overlap ovr_xz = SGAlgorithms::inferTransitiveOverlap(ovrXY, ovrYZ);
- if(ovr_xz.match.getMinOverlapLength() >= minOverlap)
- {
- out.push_back(Candidate(pYZ->getEnd(), ovr_xz));
- pYZ->getEnd()->setColor(GC_BLACK);
- }
- }
- }
- }
- }
-
- // Reset colors
- for(size_t i = 0; i < edges.size(); ++i)
- edges[i]->getEnd()->setColor(GC_WHITE);
- pVertex->setColor(GC_WHITE);
- for(size_t i = 0; i < out.size(); ++i)
- out[i].pEndpoint->setColor(GC_WHITE);
- return out;
-}
-
-//
// SGTrimVisitor - Remove "dead-end" vertices from the graph
//
void SGTrimVisitor::previsit(StringGraph* pGraph)
@@ -619,31 +382,6 @@ void SGDuplicateVisitor::postvisit(StringGraph* pGraph)
}
//
-// SGIslandVisitor - Remove island (unconnected) vertices
-//
-void SGIslandVisitor::previsit(StringGraph* pGraph)
-{
- pGraph->setColors(GC_WHITE);
-}
-
-// Mark any nodes that dont have edges
-bool SGIslandVisitor::visit(StringGraph* /*pGraph*/, Vertex* pVertex)
-{
- if(pVertex->countEdges() == 0)
- {
- pVertex->setColor(GC_BLACK);
- return true;
- }
- return false;
-}
-
-// Remove all the marked vertices
-void SGIslandVisitor::postvisit(StringGraph* pGraph)
-{
- pGraph->sweepVertices(GC_BLACK);
-}
-
-//
// Small repeat resolver - Remove edges induced from small (sub-read length)
// repeats
//
@@ -1079,67 +817,6 @@ void SGSmoothingVisitor::postvisit(StringGraph* pGraph)
printf("VariationSmoother: Removed %d simple and %d complex bubbles\n", m_simpleBubblesRemoved, m_complexBubblesRemoved);
}
-
-//
-// Coverage analysis
-//
-void SGCoverageVisitor::previsit(StringGraph* pGraph)
-{
- pGraph->setColors(GC_WHITE);
- m_numRemoved = 0;
-}
-
-//
-bool SGCoverageVisitor::visit(StringGraph* /*pGraph*/, Vertex* pVertex)
-{
- for(size_t idx = 0; idx < ED_COUNT; idx++)
- {
- EdgeDir dir = EDGE_DIRECTIONS[idx];
- EdgePtrVec edges = pVertex->getEdges(dir);
- if(edges.size() <= 1)
- continue;
-
- int bestCoverage = 0;
- int worstCoverage = std::numeric_limits<int>::max();
- int popIndex = -1;
-
- std::cout << "Bubble coverage:\n";
- for(size_t i = 0; i < edges.size(); ++i)
- {
- int coverage = SGSearch::countSpanningCoverage(edges[i], 20);
-
- std::cout << "\t" << i << " cov: " << coverage << "\n";
-
- if(coverage > bestCoverage)
- bestCoverage = coverage;
-
- if(coverage < worstCoverage)
- {
- worstCoverage = coverage;
- popIndex = i;
- }
- }
-
- if(worstCoverage > 0 && worstCoverage < m_cutoff && bestCoverage > m_cutoff)
- {
- assert(popIndex != -1);
- if(edges[popIndex]->getEnd()->getColor() != GC_RED)
- {
- edges[popIndex]->getEnd()->setColor(GC_RED);
- ++m_numRemoved;
- }
- }
- }
- return false;
-}
-
-//
-void SGCoverageVisitor::postvisit(StringGraph* pGraph)
-{
- pGraph->sweepVertices(GC_RED);
- printf("Removed %d low-coverage nodes\n", m_numRemoved);
-}
-
//
// SGGraphStatsVisitor - Collect summary stasitics
// about the graph
@@ -1195,53 +872,3 @@ void SGGraphStatsVisitor::postvisit(StringGraph* /*pGraph*/)
num_island, num_terminal,
num_monobranch, num_dibranch, num_simple);
}
-
-//
-bool SGBreakWriteVisitor::visit(StringGraph* /*pGraph*/, Vertex* pVertex)
-{
- int s_count = pVertex->countEdges(ED_SENSE);
- int as_count = pVertex->countEdges(ED_ANTISENSE);
-
- if(s_count == 0 && as_count == 0)
- {
- writeBreak("ISLAND", pVertex);
- }
- else if(s_count == 0)
- {
- writeBreak("STIP", pVertex);
- }
- else if(as_count == 0)
- {
- writeBreak("ASTIP", pVertex);
- }
-
- if(s_count > 1)
- {
- std::stringstream text;
- text << "SBRANCHED," << calculateOverlapLengthDifference(pVertex, ED_SENSE);
- writeBreak(text.str(), pVertex);
- }
-
- if(as_count > 1)
- {
- std::stringstream text;
- text << "ASBRANCHED," << calculateOverlapLengthDifference(pVertex, ED_ANTISENSE);
- writeBreak(text.str(), pVertex);
- }
- return false;
-}
-
-int SGBreakWriteVisitor::calculateOverlapLengthDifference(const Vertex* pVertex, EdgeDir dir)
-{
- EdgePtrVec edges = pVertex->getEdges(dir);
- if(edges.size() < 2)
- return 0;
- int shortestLen = edges[edges.size() - 1]->getOverlap().getOverlapLength(0);
- int secondLen = edges[edges.size() - 2]->getOverlap().getOverlapLength(0);
- return secondLen - shortestLen;
-}
-
-void SGBreakWriteVisitor::writeBreak(const std::string& type, Vertex* pVertex)
-{
- *m_pWriter << "BREAK\t" << type << "\t" << pVertex->getID() << "\t" << pVertex->getSeq() << "\n";
-}
View
118 src/StringGraph/SGVisitors.h
@@ -30,21 +30,6 @@ struct SGFastaVisitor
std::ofstream m_fileHandle;
};
-// Visit each node and write the overlaps to the specified file
-struct SGOverlapWriterVisitor
-{
- SGOverlapWriterVisitor(std::string filename) : m_fileHandle(filename.c_str()) {}
- ~SGOverlapWriterVisitor() { m_fileHandle.close(); }
-
- // functions
- void previsit(StringGraph* /*pGraph*/) {}
- bool visit(StringGraph* pGraph, Vertex* pVertex);
- void postvisit(StringGraph* /*pGraph*/) {}
-
- // data
- std::ofstream m_fileHandle;
-};
-
// Run the Myers transitive reduction algorithm on each node
struct SGTransitiveReductionVisitor
{
@@ -87,17 +72,6 @@ struct SGValidateStructureVisitor
};
// Remodel the graph to infer missing edges or remove erroneous edges
-struct SGRemodelVisitor
-{
- SGRemodelVisitor() {}
- void previsit(StringGraph* pGraph);
- bool visit(StringGraph* pGraph, Vertex* pVertex);
- void postvisit(StringGraph*);
-
- double m_remodelER;
-};
-
-// Remodel the graph to infer missing edges or remove erroneous edges
struct SGSmallRepeatResolveVisitor
{
SGSmallRepeatResolveVisitor(int minDiff) : m_minDiff(minDiff) {}
@@ -120,38 +94,6 @@ struct SGOverlapRatioVisitor
double m_minRatio;
};
-
-// Compute edge summary statistics
-struct SGEdgeStatsVisitor
-{
- struct Candidate
- {
- Candidate(Vertex* pv, const Overlap& o) : pEndpoint(pv), ovr(o) {}
- Vertex* pEndpoint;
- Overlap ovr;
- };
- typedef std::vector<Candidate> CandidateVector;
- typedef std::map<int, int> IntIntMap;
- typedef std::map<int, IntIntMap> CountMatrix;
-
- //
- SGEdgeStatsVisitor() {}
- void previsit(StringGraph* pGraph);
- bool visit(StringGraph* pGraph, Vertex* pVertex);
- void postvisit(StringGraph*);
-
- CandidateVector getMissingCandidates(StringGraph* pGraph, Vertex* pVertex, int minOverlap) const;
- void addOverlapToCount(int ol, int nd, CountMatrix& matrix);
- void printCounts(CountMatrix& matrix);
-
- //
- CountMatrix foundCounts;
- CountMatrix missingCounts;
- int maxDiff;
- int minOverlap;
- int maxOverlap;
-};
-
// Detects and removes small "tip" vertices from the graph
// when they are less than minLength in size
struct SGTrimVisitor
@@ -178,37 +120,6 @@ struct SGDuplicateVisitor
bool m_bSilent;
};
-// Detect small island vertices and removal them
-struct SGIslandVisitor
-{
- SGIslandVisitor() {}
- void previsit(StringGraph* pGraph);
- bool visit(StringGraph* pGraph, Vertex* pVertex);
- void postvisit(StringGraph*);
-};
-
-
-// Detect whether vertices are bubbles and mark them for removal
-struct SGBubbleVisitor
-{
- SGBubbleVisitor() {}
- void previsit(StringGraph* pGraph);
- bool visit(StringGraph* pGraph, Vertex* pVertex);
- void postvisit(StringGraph*);
- int num_bubbles;
-};
-
-// Detect whether bubble edges and remove them
-struct SGBubbleEdgeVisitor
-{
- SGBubbleEdgeVisitor() {}
- void previsit(StringGraph* pGraph);
- bool visit(StringGraph* pGraph, Vertex* pVertex);
- void postvisit(StringGraph*);
-
- int num_bubbles;
-};
-
// Remove the edges of super-repetitive vertices in the graph
struct SGSuperRepeatVisitor
{
@@ -246,18 +157,6 @@ struct SGSmoothingVisitor
std::ofstream m_outFile;
};
-// Remove vertices/edges that have low coverage
-struct SGCoverageVisitor
-{
- SGCoverageVisitor(int cutoff) : m_cutoff(cutoff) {}
- void previsit(StringGraph* pGraph);
- bool visit(StringGraph* pGraph, Vertex* pVertex);
- void postvisit(StringGraph*);
-
- int m_cutoff;
- int m_numRemoved;
-};
-
// Compile summary statistics for the graph
struct SGGraphStatsVisitor
{
@@ -276,21 +175,4 @@ struct SGGraphStatsVisitor
size_t sum_edgeLen;
};
-// Write out any vertices that are going to be cause a contig to terminate
-// to the file
-struct SGBreakWriteVisitor
-{
- SGBreakWriteVisitor(const std::string& filename) { m_pWriter = createWriter(filename); }
- ~SGBreakWriteVisitor() { delete m_pWriter; }
-
- void previsit(StringGraph*) {}
- bool visit(StringGraph* pGraph, Vertex* pVertex);
- void postvisit(StringGraph*) {}
- void writeBreak(const std::string& type, Vertex* pVertex);
- int calculateOverlapLengthDifference(const Vertex* pVertex, EdgeDir dir);
-
- std::ostream* m_pWriter;
-
-};
-
#endif

0 comments on commit ddee4c7

Please sign in to comment.
Something went wrong with that request. Please try again.