Permalink
Find file
Fetching contributors…
Cannot retrieve contributors at this time
146 lines (119 sloc) 4.38 KB
# Mercury tests, as well the quicksilver and liquidmetal algorithms which we
# sometimes use for comparison
require 'mercury'
# Fuzzy matching algorithm from Quicksilver, the OS X tool
def qs_score(entry, abbrev)
return 0.9 if abbrev.length == 0
return 0.0 if abbrev.length > entry.length
abbrev.length.downto(1) do |i|
sub_abbrev = abbrev[0...i]
index = entry.index(sub_abbrev)
next if index.nil?
next if index + sub_abbrev.length > entry.length
next_entry = entry[index+sub_abbrev.length..-1]
next_abbrev = i >= abbrev.length ? "" : abbrev[i..-1]
remaining_score = qs_score(next_entry, next_abbrev)
if remaining_score > 0
score = entry.length - next_entry.length
if index != 0
c = entry[index - 1]
word_boundaries = " \t/._"
if word_boundaries.include?(c)
for j in 0...(index-1)
c = entry[j]
score -= word_boundaries.include?(c) ? 1 : 0.15
end
else
score -= index
end
end
score += remaining_score * next_entry.length
score /= entry.length
return score
end
end
return 0.0
end
# Port of Ryan McGeary's LiquidMetal fuzzy matching algorithm found at:
# http://github.com/rmm5t/liquidmetal/tree/master.
class LiquidMetal
@@SCORE_NO_MATCH = 0.0
@@SCORE_MATCH = 1.0
@@SCORE_TRAILING = 0.8
@@SCORE_TRAILING_BUT_STARTED = 0.90
@@SCORE_BUFFER = 0.85
def self.score(string, abbrev)
return @@SCORE_TRAILING if abbrev.empty?
return @@SCORE_NO_MATCH if abbrev.length > string.length
scores = buildScoreArray(string, abbrev)
sum = scores.inject { |a, b| a + b }
return sum / scores.length;
end
def self.buildScoreArray(string, abbrev)
scores = Array.new(string.length)
lower = string.downcase()
lastIndex = -1
started = false
abbrev.downcase().each_byte do |c|
index = lower.index(c, lastIndex + 1)
return scores.fill(@@SCORE_NO_MATCH, 0..-1) if index.nil?
started = true if index == 0
if index > 0 and " \t/._-".include?(string[index - 1])
scores[index - 1] = @@SCORE_MATCH
scores.fill(@@SCORE_BUFFER, (lastIndex + 1)...(index - 1))
elsif string[index] >= "A"[0] and string[index] <= "Z"[0]
scores.fill(@@SCORE_BUFFER, (lastIndex + 1)...index)
else
scores.fill(@@SCORE_NO_MATCH, (lastIndex + 1)...index)
end
scores[index] = @@SCORE_MATCH
lastIndex = index
end
trailing_score = started ? @@SCORE_TRAILING_BUT_STARTED : @@SCORE_TRAILING
scores.fill(trailing_score, lastIndex + 1)
return scores
end
end
def print_score(entry, abbrev)
old_score = LiquidMetal.score(entry, abbrev)
new_score = Mercury.score(entry, abbrev)
puts "%-25s %-10s %.4f %.4f" % [entry, abbrev, old_score, new_score]
end
def test(high_entry, low_entry, abbrev)
high = Mercury.score(high_entry, abbrev)
low = Mercury.score(low_entry, abbrev)
puts "%-8s %-10s %-25s %.4f %-25s %.4f" % [high > low ? "PASS" : "FAIL", abbrev,
high_entry, high, low_entry, low]
end
print_score("phone numbers.txt", "phnb")
print_score("phone numbers.txt", "pht")
print_score("poohead.txt", "pht")
print_score("poohead.txt", "poo")
print_score("poohead.txt", "pood")
print_score("foo.lua", "lua")
print_score("lua-expression.h", "lua")
print_score("query-dispatcher.h", "qudisp")
print_score("query-dispatcher.cc", "qudisp")
print_score("query-dispatcher.h", "qudisph")
print_score("query-dispatcher.cc", "qudisph")
print_score("query-dispatczer.h", "qudisph")
print_score("query-dispatczer.h", "qudisp")
print_score("aaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaa")
print_score("query-executor.h", "exec")
print_score("query-executor.cc", "exec")
print_score("tablet-executor-node.h", "ten")
print_score("materialize-executor-node.h", "ten")
test("protocoltype.go", "serverstatus.proto", "proto")
test("protocoltype.go", "test.proto", "proto")
test("protocoltype.go", "test.proto", "pr")
test("protocoltype.go", "test.proto", "p")
test("protocoltype.go", "test.proto", "poto")
test("query-dispatcher.h", "query-dispatcher.cc", "qudisph")
test("query-plan.h", "query-dispatcher.h", "qup")
test("phone numbers.txt", "poohead.txt", "pht")
test("tokyo.txt", "abcde.txt", "t")
test("tablet-executor-node.h", "materialize-executor-node.h", "ten")
test("tent.txt", "tablet-executor-node.h", "ten")
test("tent.txt", "tablet-executor-node.h", "te")
test("ad", "m-a", "a")
test("a", "b", "A")