Permalink
Browse files

SuffixTree: insert_ukkonen: Use a hash table of suffix links.

Instead of storing them in the Nodes. This avoids wasting space
that we only need during construction, and which we'd like to
discard after construction.
Note that the hash table (std::unordered_map) insert and lookup
are now only amortized constant time, and could become linear,
whereas just setting and getting from the node was very fast
constant time. So this makes this Ukkonen implementation no
longer truly O(m).
  • Loading branch information...
1 parent 28fffb8 commit 53d3e4ccc5c6497afd7db4b02478cb3f049ec7b1 @murraycu committed Aug 19, 2016
Showing with 9 additions and 6 deletions.
  1. +9 −6 murrayc-suffix-tree/suffix_tree.h
@@ -2,6 +2,7 @@
#define MURRAYC_SUFFIX_TREE_SUFFIX_TREE_H
#include <iostream>
+#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <set>
@@ -199,9 +200,6 @@ class SuffixTree {
// TODO: Wastes space on non-leaves.
// TODO: Use a set, though that would not allow duplicates.
std::unordered_set<T_Value> values_;
-
- // For Ukkonen's Suffix Tree construction algorithm.
- Node* suffix_link_ = nullptr;
};
class ActivePoint {
@@ -232,6 +230,8 @@ class SuffixTree {
auto end_ptr = std::make_shared<KeyIterator>(key_start);
KeyIterator& end = *end_ptr; //end is 1 past the end, so this is equivalent to -1 in the traditional Ukkonnen implementation.
+ std::unordered_map<Node*, Node*> suffix_links;
+
// The "phases"
for (auto i = key_start; i != key_end; ++i) {
std::cout << " character: " << *i << std::endl;
@@ -280,18 +280,21 @@ class SuffixTree {
}
// Every internal node should have a suffix link:
- extra_node->suffix_link_ = &root_;
+ suffix_links[extra_node] = &root_;
// A previously-created internal node should now have its suffix link
// updated to this new internal node.
if (prev_created_internal_node) {
- prev_created_internal_node->suffix_link_ = extra_node;
+ suffix_links[prev_created_internal_node] = extra_node;
}
prev_created_internal_node = extra_node;
// Follow previous suffix link if the active node is not root:
if (active.node != &root_) {
- active.node = active.node->suffix_link_;
+ const auto iter = suffix_links.find(active.node);
+ assert(iter != suffix_links.end());
+ active.node = iter->second;
+
// Not changing active.edge or active.length.
// Note: If there are multiple constructions, then active.length
// might now be past the end of the actual edge's part.

0 comments on commit 53d3e4c

Please sign in to comment.