Permalink
Browse files

無分岐ノード圧縮はサイズは減るけど検索速度も落ちるので元に戻す

  • Loading branch information...
sile committed Nov 24, 2011
1 parent 8efb12d commit 37584f9cae716c6f7176b3510b971bc71c2056ba
Showing with 11 additions and 35 deletions.
  1. +5 −15 src/builder.rb
  2. +6 −20 src/searcher.rb
View
@@ -165,18 +165,16 @@ class Node
attr_accessor :index
attr_accessor :base
attr_accessor :node
- attr_accessor :nobranch_child_label
def initialize(parent_base, node)
@base = 0
@node = node
@index = parent_base + node.label
- @nobranch_child_label = 0
end
def pack
- n1 = (@base & 0xFFFFFF) | (@node.label << 24)
- n2 = (@node.sibling_total << 9) | ((@node.is_terminal ? 1 : 0) << 8) | nobranch_child_label
+ n1 = (@base & 0x7FFFFFFF) | ((@node.is_terminal ? 1 : 0) << 31)
+ n2 = (@node.label & 0xFF) | (@node.sibling_total << 8)
[n1,n2].pack("N2")
end
end
@@ -191,19 +189,13 @@ def build_impl(node)
trie = node.node
children = trie.children
- if @memo.key?(trie) == false && children.size == 1 && children[0].is_terminal == false
- node.nobranch_child_label = children[0].label
- trie = children[0]
- children = trie.children
- end
-
if @memo.key?(trie)
node.base = @memo[trie]
@nodes[node.index] = node
elsif children.size==0
@nodes[node.index] = node
else
- base = @alloca.allocate(children.map{|c| c.label })
+ base = @alloca.allocate(children.map{|c| c.label})
@memo[trie] = base
node.base = base
@nodes[node.index] = node
@@ -238,7 +230,5 @@ def build (trie, output_file)
end
end
-keys = open(ARGV[0]).read.split("\n")
-dawg = Dawg::Builder.new(:show_progress => true).build(keys)
-Dawg::DA.new.build(dawg.root, "/tmp/dawg.idx")
-:done
+#keys = open(ARGV[0]).read.split("\n")
+#Dawg::Builder.new(:show_progress => true).build(keys)
View
@@ -10,17 +10,7 @@ def initialize(index_path)
# key = string or stringio
def member?(key)
index = 0
- flag = true
key.each_byte do |arc|
- nc_chck = nobranch_child_chck(index)
- #puts "#{arc}, #{nc_chck}"
- if flag && nc_chck != 0
- flag = false
- next if nc_chck==arc
- return false
- end
- flag = true
-
next_index = base(index) + arc
return false if chck(next_index) != arc
index = next_index
@@ -58,24 +48,20 @@ def each_common_prefix(key)
nil
end
- def base(index)
- @nodes[index*2] & 0xFFFFFF
+ def is_terminal(index)
+ (@nodes[index*2] >> 31) != 0
end
- def chck(index)
- @nodes[index*2] >> 24
+ def base(index)
+ @nodes[index*2] & 0x7FFFFFFF
end
- def nobranch_child_chck(index)
+ def chck(index)
@nodes[index*2+1] & 0xFF
end
- def is_terminal(index)
- (@nodes[index*2+1] & 0x100) != 0
- end
-
def sibling_total(index)
- @nodes[index*2+1] >> 9
+ @nodes[index*2+1] >> 8
end
def id_offset(index)

0 comments on commit 37584f9

Please sign in to comment.