Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Removing unused code

  • Loading branch information...
commit 7e4e632714c0a774c0c570e32b9622f4a6b165ac 1 parent c5d4abd
@jts authored
View
27 src/Bigraph/Vertex.cpp
@@ -303,33 +303,6 @@ MultiOverlap Vertex::getMultiOverlap() const
return mo;
}
-// Get a SeqTrie of the overlaps of this vertex
-void Vertex::fillTries(double p_error, SeqTrie* pSenseTrie, SeqTrie* pAntisenseTrie) const
-{
- double lp = log(p_error);
-
- if(m_edges.empty())
- return;
-
- for(size_t i = 0; i < m_edges.size(); ++i)
- {
- Edge* pEdge = m_edges[i];
- std::string overlapped = pEdge->getTwin()->getMatchStr();
- if(pEdge->getComp() == EC_REVERSE)
- overlapped = reverseComplement(overlapped);
-
- if(pEdge->getDir() == ED_SENSE)
- {
- overlapped = reverse(overlapped);
- pSenseTrie->insert(overlapped, lp);
- }
- else
- {
- pAntisenseTrie->insert(overlapped, lp);
- }
- }
-}
-
// Add an edge
void Vertex::addEdge(Edge* ep)
{
View
3  src/Bigraph/Vertex.h
@@ -86,9 +86,6 @@ class Vertex
// Get a multioverlap object representing the overlaps for this vertex
MultiOverlap getMultiOverlap() const;
- // Construct a trie from the edges, one for each each direction
- void fillTries(double p_error, SeqTrie* pSenseTrie, SeqTrie* pAntisenseTrie) const;
-
// Edge list operations
void addEdge(Edge* ep);
void removeEdge(Edge* pEdge);
View
47 src/StringGraph/SGAlgorithms.cpp
@@ -349,53 +349,6 @@ bool SGAlgorithms::hasTransitiveOverlap(const Overlap& ovrXY, const Overlap& ovr
return Match::doMatchesIntersect(match_yx, match_yz);
}
-// Construct an extended multioverlap for a vertex
-MultiOverlap SGAlgorithms::makeExtendedMultiOverlap(const StringGraph* pGraph, const Vertex* pVertex)
-{
- CompleteOverlapSet overlapSet(pVertex, pGraph->getErrorRate(), 1);
- EdgeDescOverlapMap overlapMap = overlapSet.getOverlapMap();
-
- MultiOverlap mo(pVertex->getID(), pVertex->getSeq().toString());
- for(EdgeDescOverlapMap::const_iterator iter = overlapMap.begin();
- iter != overlapMap.end(); ++iter)
- {
- mo.add(iter->first.pVertex->getSeq().toString(), iter->second);
- }
- return mo;
-}
-
-//
-void SGAlgorithms::makeExtendedSeqTries(const StringGraph* pGraph, const Vertex* pVertex, double p_error, SeqTrie* pLeftTrie, SeqTrie* pRightTrie)
-{
- double lp = log(p_error);
- CompleteOverlapSet overlapSet(pVertex, pGraph->getErrorRate(), 1);
- EdgeDescOverlapMap overlapMap = overlapSet.getOverlapMap();
-
- for(EdgeDescOverlapMap::const_iterator iter = overlapMap.begin();
- iter != overlapMap.end(); ++iter)
- {
- // Coord[0] of the match is wrt pVertex, coord[1] is the other read
- std::string overlapped = iter->second.match.coord[1].getSubstring(iter->first.pVertex->getSeq().toString());
- if(iter->second.match.isRC())
- overlapped = reverseComplement(overlapped);
-
- if(iter->second.match.coord[0].isRightExtreme())
- {
- overlapped = reverse(overlapped);
- pRightTrie->insert(overlapped, lp);
- }
- else if(iter->second.match.coord[0].isLeftExtreme())
- {
- pLeftTrie->insert(overlapped, lp);
- }
- else
- {
- // ignore substrings
- //assert(iter->second.match.coord[0].isLeftExtreme());
- }
- }
-}
-
//
EdgeDesc SGAlgorithms::getEdgeDescFromEdge(Edge* pEdge)
{
View
7 src/StringGraph/SGAlgorithms.h
@@ -78,13 +78,6 @@ void partitionTransitiveOverlaps(EdgeDescOverlapMap* pOverlapMap,
// This function removes any duplicates
void removeSubmaximalOverlaps(EdgeDescOverlapMap* pOverlapMap);
-// Construct an extended multioverlap for a vertex
-MultiOverlap makeExtendedMultiOverlap(const StringGraph* pGraph, const Vertex* pVertex);
-
-// Construct SeqTries from the extended overlap set
-void makeExtendedSeqTries(const StringGraph* pGraph, const Vertex* pVertex,
- double p_error, SeqTrie* pLeftTrie, SeqTrie* pRightTrie);
-
// Simple getters for std::transform
EdgeDesc getEdgeDescFromEdge(Edge* pEdge);
EdgeDesc getEdgeDescFromPair(const EdgeDescOverlapPair& pair);
View
1  src/StringGraph/SGDebugAlgorithms.cpp
@@ -9,7 +9,6 @@
//
#include "SGDebugAlgorithms.h"
#include "SGAlgorithms.h"
-#include "SeqTrie.h"
#include <algorithm>
//
View
41 src/Util/ContigGraph.cpp
@@ -1,41 +0,0 @@
-#include "ContigGraph.h"
-
-//
-// Build the vertices
-//
-void loadVertices(ContigGraph& graph, int /*kmer*/, std::string filename)
-{
- std::ifstream file(filename.c_str());
- assert(file.is_open());
- Contig c;
- while(readCAF(file,c))
- {
- graph.addVertex(new ContigVertex(c.getID(), c));
- }
-}
-
-//
-// Build the edges
-//
-void loadEdges(ContigGraph& graph, int overlap, std::string filename)
-{
- std::ifstream file(filename.c_str());
- assert(file.is_open());
- AdjInfo a;
- while(file >> a)
- {
- graph.addEdge(new Edge(a.from, a.to, (EdgeDir)a.dir, (EdgeComp)a.comp));
- }
-}
-
-//
-// Build the graph
-//
-ContigGraph* createContigGraph(int k, std::string contigsCAF, std::string adjCAF)
-{
- ContigGraph* pGraph = new ContigGraph;
- loadVertices(*pGraph, k, contigsCAF);
- loadEdges(*pGraph, k - 1, adjCAF);
- return pGraph;
-}
-
View
25 src/Util/ContigGraph.h
@@ -1,25 +0,0 @@
-#ifndef CONTIGGRAPH_H
-#define CONTIGGRAPH_H
-
-#include "Bigraph.h"
-#include "Contig.h"
-#include <cassert>
-#include <cerrno>
-#include <cstring>
-#include <cstdlib>
-#include <iostream>
-#include <fstream>
-#include <getopt.h>
-#include <sstream>
-#include <string>
-
-//
-// Typedefs
-//
-typedef Bigraph ContigGraph;
-
-void loadVertices(ContigGraph& graph, int /*kmer*/, std::string filename);
-void loadEdges(ContigGraph& graph, int overlap, std::string filename);
-ContigGraph* createContigGraph(int k, std::string contigsCAF, std::string adjCAF);
-
-#endif
View
2  src/Util/Makefile.am
@@ -20,8 +20,6 @@ libutil_a_SOURCES = \
MultiOverlap.h MultiOverlap.cpp \
QualityVector.h QualityVector.cpp \
Stats.h Stats.cpp \
- SeqTrie.h SeqTrie.cpp \
- SeqDAVG.h SeqDAVG.cpp \
Quality.h Quality.cpp \
PrimerScreen.h PrimerScreen.cpp \
BitVector.h BitVector.cpp \
View
1  src/Util/MultiOverlap.h
@@ -13,7 +13,6 @@
#include "Match.h"
#include "Pileup.h"
#include "DNADouble.h"
-#include "SeqTrie.h"
class MultiOverlap
{
View
214 src/Util/SeqDAVG.cpp
@@ -1,214 +0,0 @@
-//-----------------------------------------------
-// Copyright 2009 Wellcome Trust Sanger Institute
-// Written by Jared Simpson (js18@sanger.ac.uk)
-// Released under the GPL
-//-----------------------------------------------
-//
-//
-// SeqDAVG.h - Directed acyclic variant graph
-// for a fixed-length sequence. Contains all the variants
-// seen in the overlaps for a single string.
-//
-#include "SeqDAVG.h"
-#include <iostream>
-
-//
-// Link
-//
-SeqDAVG::Link::Link() : pNode(NULL), label('\0'), count(0), weight(0.0f)
-{
-
-}
-
-SeqDAVG::Link::Link(Node* p, char l) : pNode(p), label(l), count(0), weight(0.0f)
-{
-
-}
-
-void SeqDAVG::Link::increment()
-{
- ++count;
-}
-
-void SeqDAVG::Link::decrement()
-{
- --count;
-}
-
-void SeqDAVG::Link::addWeight(double w)
-{
- weight += w;
-}
-
-//
-// Node
-//
-SeqDAVG::Node::Node()
-{
-}
-
-//
-SeqDAVG::Node::~Node()
-{
-}
-
-// Return a pointer to the link with label otherwise NULL
-SeqDAVG::Link* SeqDAVG::Node::getLink(char label)
-{
- return SeqDAVG::find(pChildLinks, label);
-}
-
-// Create a new child node
-SeqDAVG::Link* SeqDAVG::Node::addLink(Node* pNode, double weight, char label)
-{
- Link* pLink = getLink(label);
- if(pLink == NULL)
- {
- pChildLinks.push_back(Link(pNode, label));
- pLink = &pChildLinks.back();
- }
- pLink->addWeight(weight);
- pLink->increment();
- return pLink;
-}
-
-// Output node in dot format
-void SeqDAVG::Node::writeDot(std::ostream& out) const
-{
- out << "\"" << this << "\" [label=\"\"];\n";
- for(LinkList::const_iterator iter = pChildLinks.begin(); iter != pChildLinks.end(); ++iter)
- {
- out << "\"" << this << "\" -> \"" << iter->pNode << "\" [label=\""
- << iter->label << "," << iter->weight << "\"];\n";
- }
-}
-
-//
-// SeqDAVG
-//
-
-//
-SeqDAVG::SeqDAVG()
-{
- m_pRoot = new Node();
-}
-
-//
-SeqDAVG::~SeqDAVG()
-{
- delete m_pRoot;
- for(size_t i = 0; i < m_data.size(); ++i)
- {
- for(LinkList::iterator iter = m_data[i].begin(); iter != m_data[i].end(); ++iter)
- delete iter->pNode;
- }
-}
-
-// insert the string s into the trie so that it is a child
-// of the node(s) at DEPTH. Children of the root (the first
-// nodes) are depth 0. If depth is higher than the deepest
-// node in the trie, this will do nothing.
-void SeqDAVG::insert(const std::string& s, double weight, size_t depth)
-{
- if(s.empty())
- return;
-
- // Expand the data vector if necessary
- if(depth + s.size() > m_data.size())
- {
- m_data.resize(depth + s.size());
- }
-
- // Create any new nodes
- for(size_t i = 0; i < s.size(); ++i)
- {
- size_t curr_depth = depth + i;
- char label = s[i];
-
- // Find the node, if it doesnt exist create it
- Link* pNodeLink = find(m_data[curr_depth], label);
- if(pNodeLink == NULL)
- {
- // Create the new node
- Node* pNode = new Node;
- m_data[curr_depth].push_back(Link(pNode, label));
- }
- }
-
- // Create links between parent and children nodes
- // This starts at the node before the inserted string
- // and continues up to the last node of string
- char parentLabel = '*';
- char childLabel = '*';
-
- for(size_t i = 0; i <= s.size() && i + depth < m_data.size(); ++i)
- {
- if(i == s.size())
- childLabel = '*';
- else
- childLabel = s[i];
-
- int childDepth = depth + i;
- int parentDepth = childDepth - 1;
-
- LinkList childList = findList(m_data[childDepth], childLabel);
- assert(!childList.empty());
-
- // Add to parents or root
- if(parentDepth < 0)
- {
- for(LinkList::iterator childIter = childList.begin();
- childIter != childList.end(); ++childIter)
- m_pRoot->addLink(childIter->pNode, weight, childIter->label);
-
- }
- else
- {
- LinkList parentList = findList(m_data[parentDepth], parentLabel);
- for(LinkList::iterator parentIter = parentList.begin();
- parentIter != parentList.end(); ++parentIter)
- {
- for(LinkList::iterator childIter = childList.begin();
- childIter != childList.end(); ++childIter)
- {
- parentIter->pNode->addLink(childIter->pNode, weight, childIter->label);
- }
- }
- }
- parentLabel = childLabel;
- }
-}
-
-//
-SeqDAVG::Link* SeqDAVG::find(LinkList& list, char label)
-{
- for(LinkList::iterator iter = list.begin(); iter != list.end(); ++iter)
- if(iter->label == label)
- return &(*iter);
- return NULL;
-
-}
-
-SeqDAVG::LinkList SeqDAVG::findList(LinkList& list, char label)
-{
- LinkList out;
- for(LinkList::iterator iter = list.begin(); iter != list.end(); ++iter)
- if(label == '*' || iter->label == label)
- out.push_back(*iter);
- return out;
-}
-
-// Write the trie to a dot file
-void SeqDAVG::writeDot(std::string filename)
-{
- std::ofstream writer(filename.c_str());
- writer << "digraph G\n{\n";
- m_pRoot->writeDot(writer);
- for(size_t i = 0; i < m_data.size(); ++i)
- {
- for(LinkList::iterator iter = m_data[i].begin(); iter != m_data[i].end(); ++iter)
- iter->pNode->writeDot(writer);
- }
- writer << "}\n";
- writer.close();
-}
View
78 src/Util/SeqDAVG.h
@@ -1,78 +0,0 @@
-//-----------------------------------------------
-// Copyright 2009 Wellcome Trust Sanger Institute
-// Written by Jared Simpson (js18@sanger.ac.uk)
-// Released under the GPL
-//-----------------------------------------------
-//
-// SeqDAVG.h - Directed acyclic variant graph
-// for a fixed-length sequence. Contains all the variants
-// seen in the overlaps for a single string.
-//
-#ifndef SEQDAVG_H
-#define SEQDAVG_H
-
-#include "Util.h"
-#include <list>
-
-class SeqDAVG
-{
- public:
- // Internal datastructures
- class Node;
- struct Link
- {
- // functions
- Link();
- Link(Node* p, char l);
- void increment();
- void decrement();
- void addWeight(double w);
-
- // data
- Node* pNode;
- char label;
- int count;
- double weight;
- };
-
- typedef std::list<Link> LinkList;
-
- class Node
- {
- public:
- // functions
- Node();
- ~Node();
-
- Link* getLink(char label);
- Link* addLink(Node* pNode, double weight, char label);
- void writeDot(std::ostream& out) const;
-
- private:
-
- //data
- LinkList pChildLinks;
- };
-
- typedef std::list<Node*> NodePList;
-
- //
- SeqDAVG();
- ~SeqDAVG();
-
- //
- void insert(const std::string& s, double weight, size_t depth = 0);
-
- // I/O
- void writeDot(std::string filename);
-
-
- private:
- static Link* find(LinkList& list, char label);
- static LinkList findList(LinkList& list, char label);
-
- std::vector<LinkList> m_data;
- Node* m_pRoot;
-};
-
-#endif
View
331 src/Util/SeqTrie.cpp
@@ -1,331 +0,0 @@
-//-----------------------------------------------
-// Copyright 2009 Wellcome Trust Sanger Institute
-// Written by Jared Simpson (js18@sanger.ac.uk)
-// Released under the GPL
-//-----------------------------------------------
-//
-// SeqTrie.h - Sequence trie data structure
-//
-#include "SeqTrie.h"
-#include <iostream>
-#include <fstream>
-#include <math.h>
-
-//
-// Link
-//
-SeqTrie::Link::Link() : pNode(NULL), label('\0'), count(0), weight(0.0f)
-{
-
-}
-
-SeqTrie::Link::Link(Node* p, char l) : pNode(p), label(l), count(0), weight(0.0f)
-{
-
-}
-
-void SeqTrie::Link::increment()
-{
- ++count;
-}
-
-void SeqTrie::Link::decrement()
-{
- --count;
-}
-
-void SeqTrie::Link::addWeight(double w)
-{
- weight += w;
-}
-
-//
-// Node
-//
-SeqTrie::Node::Node(Node* pParent, char parentLabel)
-{
- parentLink.pNode = pParent;
- parentLink.label = parentLabel;
-}
-
-// Destructor, destroy the children of this node
-SeqTrie::Node::~Node()
-{
- for(LinkList::iterator iter = childLinks.begin(); iter != childLinks.end(); ++iter)
- delete iter->pNode;
-}
-
-// Return a pointer to the link with label otherwise NULL
-SeqTrie::Link* SeqTrie::Node::getLink(char label)
-{
- for(LinkList::iterator iter = childLinks.begin(); iter != childLinks.end(); ++iter)
- if(iter->label == label)
- return &(*iter);
- return NULL;
-}
-
-// Create a new child node
-SeqTrie::Link* SeqTrie::Node::createChild(char label)
-{
- Node* pChild = new Node(this, label);
- Link l(pChild, label);
- childLinks.push_back(l);
- return &childLinks.back();
-}
-
-// Score the string s against the trie, descending from this node
-void SeqTrie::Node::score(const std::string& s, double lp_correct,
- double lp_error,
- double lp_missing,
- size_t idx, const PathScore& curr, PathScoreVector& out)
-{
- if(s.size() == idx || childLinks.empty())
- {
- // Fill the rest of the path with missing node scores
- PathScore terminal = curr;
- terminal.path_corrected = terminal.path_sequence;
- for(size_t i = idx; i < s.size(); ++i)
- {
- terminal.path_corrected.append(1, s[i]);
- terminal.path_sequence.append(1, 'N');
- terminal.path_score += lp_missing;
- terminal.branch_score += lp_missing;
- terminal.probVector.push_back(lp_missing);
- }
- terminal.path_score += terminal.branch_score;
- out.push_back(terminal);
- }
-
- // Descend into each subtree
- for(LinkList::iterator iter = childLinks.begin(); iter != childLinks.end(); ++iter)
- {
- PathScore branch = curr;
- if(iter->label == s[idx])
- {
- branch.path_score += lp_correct;
- }
- else
- {
- branch.path_score += lp_error;
- }
-
- branch.branch_score = log(1.0f - exp(iter->weight));
-
- branch.path_sequence.append(1, iter->label);
- ++branch.branch_length;
- branch.branch_cov += iter->count;
- branch.probVector.push_back(iter->weight);
-
- iter->pNode->score(s, lp_correct, lp_error, lp_missing, idx + 1, branch, out);
- }
-}
-
-// Get all the sequences into this subtrie and place them in svOut
-void SeqTrie::Node::getSequences(std::string curr, StringVector& svOut) const
-{
- // Reached a leaf, add the sequence to the vector
- if(childLinks.empty() && !curr.empty())
- svOut.push_back(curr);
-
- for(LinkList::const_iterator iter = childLinks.begin(); iter != childLinks.end(); ++iter)
- {
- std::string child_str = curr + iter->label;
- iter->pNode->getSequences(child_str, svOut);
- }
-}
-
-// insert the string s into this node starting with the symbol at idx
-// returns true if the sequence was successfully inserted
-bool SeqTrie::Node::insert(const std::string& s, double weight, size_t idx)
-{
- if(s.empty())
- return false;
-
- char b = s[idx];
- Link* pLink = getLink(b);
- if(pLink == NULL)
- {
- pLink = createChild(b);
- }
-
- pLink->increment();
- pLink->addWeight(weight);
-
- // Recurse
- if(++idx != s.size())
- return pLink->pNode->insert(s, weight, idx);
- else
- return true;
-}
-
-// remove the string s from the trie starting at pNode and character idx
-bool SeqTrie::Node::remove(const std::string& s, size_t idx)
-{
- char b = s[idx];
- Link* pLink = getLink(b);
- if(pLink != NULL)
- {
- pLink->decrement();
- if(++idx != s.size())
- return pLink->pNode->remove(s, idx);
- }
- return false;
-}
-
-
-// Remove children with count below cutoff
-void SeqTrie::Node::cullChildren(int cutoff)
-{
- LinkList::iterator iter = childLinks.begin();
- while(iter != childLinks.end())
- {
- if(childLinks.size() > 1 && iter->count < cutoff)
- {
- delete iter->pNode; // recursive
- iter = childLinks.erase(iter);
- }
- else
- {
- iter->pNode->cullChildren(cutoff);
- ++iter;
- }
- }
-}
-
-// Remodel the trie by remapping low-count children to high count branches
-void SeqTrie::Node::remodel(int cutoff, double weight)
-{
- // Split the link list into strong/weak links
- LinkList strongLinks;
- LinkList weakLinks;
-
- for(LinkList::iterator iter = childLinks.begin(); iter != childLinks.end(); ++iter)
- {
- if(iter->count < cutoff)
- weakLinks.push_back(*iter);
- else
- strongLinks.push_back(*iter);
- }
-
- // For all the weak links, if there is a unique solid link, re-map the sequence of the weak link
- // trie into the solid branch, otherwise just remove it
- bool bRemodel = strongLinks.size() == 1;
- for(LinkList::iterator iter = weakLinks.begin(); iter != weakLinks.end(); ++iter)
- {
- if(bRemodel)
- {
- // Get the sequences in the weak trie
- StringVector sv;
- iter->pNode->getSequences("", sv);
-
- // Insert the strings into the strong trie
- for(size_t i = 0; i < sv.size(); ++i)
- {
- //std::cout << "Inserting: " << sv[i] << "\n";
- Link& strong = strongLinks.front();
- strong.pNode->insert(sv[i], weight, 0);
- }
- }
-
- // Destroy the weak trie
- delete iter->pNode;
- }
-
- childLinks = strongLinks;
-
- // Recurse
- for(LinkList::iterator iter = childLinks.begin(); iter != childLinks.end(); ++iter)
- iter->pNode->remodel(cutoff, weight);
-}
-
-// Return the number of nodes in the trie rooted at this node
-size_t SeqTrie::Node::countNodes() const
-{
- size_t count = 1;
- for(LinkList::const_iterator iter = childLinks.begin(); iter != childLinks.end(); ++iter)
- count += iter->pNode->countNodes();
- return count;
-}
-
-// Recursive dot writer function
-void SeqTrie::Node::writeDot(std::ostream& out) const
-{
- out << "\"" << this << "\" [label=\"\"];\n";
- for(LinkList::const_iterator iter = childLinks.begin(); iter != childLinks.end(); ++iter)
- {
- out << "\"" << this << "\" -> \"" << iter->pNode << "\" [label=\""
- << iter->label << "," << iter->weight << "\"];\n";
- iter->pNode->writeDot(out);
- }
-}
-
-//
-// SeqTrie
-//
-
-//
-SeqTrie::SeqTrie()
-{
- m_pRoot = new Node(NULL, '^');
-}
-
-//
-SeqTrie::~SeqTrie()
-{
- delete m_pRoot; // recursively destroys children
- m_pRoot = NULL;
-}
-
-// insert the string s into the trie
-void SeqTrie::insert(const std::string& s, double weight)
-{
- if(s.empty())
- return;
- m_pRoot->insert(s, weight, 0);
-}
-
-// remove s from the trie
-void SeqTrie::remove(const std::string& s)
-{
- WARN_ONCE("SeqTrie::remove only decrementing but not reaping");
- m_pRoot->remove(s, 0);
-}
-
-// return the number of nodes in the trie
-size_t SeqTrie::countNodes() const
-{
- return m_pRoot->countNodes();
-}
-
-// remove all sub-tries that have a link less than cutoff
-void SeqTrie::cull(int cutoff)
-{
- m_pRoot->cullChildren(cutoff);
-}
-
-// remodel the trie but remapping low-probabilty branches
-void SeqTrie::remodel(int cutoff, double weight)
-{
- m_pRoot->remodel(cutoff, weight);
-}
-
-// score string s against the trie
-// place the result in out
-void SeqTrie::score(const std::string& s, double p_error, PathScoreVector& out)
-{
- PathScore start;
- double lp_error = log(p_error);
- double lp_correct = log(1.0f - p_error);
- double lp_missing = log(0.9);
- m_pRoot->score(s, lp_correct, lp_error, lp_missing, 0, start, out);
-}
-
-// Write the trie to a dot file
-void SeqTrie::writeDot(std::string filename)
-{
- std::ofstream writer(filename.c_str());
- writer << "digraph G\n{\n";
- m_pRoot->writeDot(writer);
- writer << "}\n";
- writer.close();
-}
View
147 src/Util/SeqTrie.h
@@ -1,147 +0,0 @@
-//-----------------------------------------------
-// Copyright 2009 Wellcome Trust Sanger Institute
-// Written by Jared Simpson (js18@sanger.ac.uk)
-// Released under the GPL
-//-----------------------------------------------
-//
-// SeqTrie.h - Sequence trie data structure
-//
-#ifndef SEQTRIE_H
-#define SEQTRIE_H
-
-#include "Util.h"
-#include "Quality.h"
-#include <algorithm>
-#include <list>
-
-struct PathScore
-{
- PathScore() : path_sequence(""), path_corrected(""),
- path_score(0.0f), branch_score(0.0f), branch_length(0), branch_cov(0), num_diff(0) {}
-
- void reverse()
- {
- std::reverse(path_sequence.begin(), path_sequence.end());
- std::reverse(path_corrected.begin(), path_corrected.end());
- std::reverse(probVector.begin(), probVector.end());
- }
-
- void print()
- {
- printf("CRT: %s PS: %lf BS: %lf AD: %lf\n", path_corrected.c_str(), path_score, branch_score,
- (double)branch_cov / (double)branch_length);
- printf("BSQ: %s\n", path_sequence.c_str());
- printf("QLT: %s\n", Quality::encodeLogProbVector(probVector).c_str());
- }
-
- // the sequence of nodes that are on this path
- std::string path_sequence;
-
- // the sequence of the input string, corrected by the path wherever it can be
- std::string path_corrected;
-
- double path_score;
- double branch_score;
- int branch_length;
- int branch_cov;
- int num_diff;
- DoubleVector probVector;
-
-};
-typedef std::vector<PathScore> PathScoreVector;
-
-
-class SeqTrie
-{
- // Internal datastructures
- class Node;
- struct Link
- {
- // functions
- Link();
- Link(Node* p, char l);
- void increment();
- void decrement();
- void addWeight(double w);
-
- // data
- Node* pNode;
- char label;
- int count;
- double weight;
- };
-
- typedef std::list<Link> LinkList;
-
- class Node
- {
- public:
- // functions
- Node(Node* pParent, char parentLabel);
- ~Node();
-
- Link* getLink(char label);
-
- bool insert(const std::string& s, double weight, size_t idx);
- bool remove(const std::string& s, size_t idx);
-
- void getSequences(std::string curr, StringVector& svOut) const;
- size_t countNodes() const;
-
- void score(const std::string& s, double lp_correct,
- double lp_error,
- double lp_missing,
- size_t idx, const PathScore& curr, PathScoreVector& out);
-
- void cullChildren(int cutoff);
- void remodel(int cutoff, double weight);
-
- void writeDot(std::ostream& out) const;
-
- private:
-
- Link* createChild(char label);
-
- //data
- Link parentLink;
- LinkList childLinks;
- };
-
- //
- public:
-
- SeqTrie();
- ~SeqTrie();
-
- void score(const std::string& s, double p_error, PathScoreVector& out);
- size_t countNodes() const;
-
- // Creation functions
- void insert(const std::string& s, double weight);
-
- // Remove the string s from the trie
- void remove(const std::string& s);
-
- // Remove all nodes that have a count less than cutoff from the tree
- void cull(int cutoff);
-
- // Find links with a count less than cutoff and re-map the branch
- void remodel(int cutoff, double weight);
-
- // I/O
- void writeDot(std::string filename);
-
- private:
-
- //
- bool insert(Node* pNode, const std::string& s, size_t idx);
- bool insertAtDepth(Node* pNode, const std::string& s, size_t depth);
- bool remove(Node* pNode, const std::string& s, size_t idx);
- void reap(Node* pNode, int cutoff);
-
- // Data
- Node* m_pRoot;
-};
-
-#endif
-
Please sign in to comment.
Something went wrong with that request. Please try again.