Skip to content

Commit

Permalink
A small bit of refactoring, and more tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
Kevin Weil committed Nov 25, 2011
1 parent d9ad670 commit 24d53ca
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 29 deletions.
58 changes: 29 additions & 29 deletions lib/fuzzy_text_matcher.rb
Expand Up @@ -42,6 +42,8 @@ def initialize(dictionary)
# * :score refers to a value between 0 and 1 indicating how closely the file matches the given pattern.
# A score of 1 means the pattern matches the file exactly.
def search(pattern, &block)
return if (pattern.nil? || pattern.empty?)

regex = make_fuzzy_search_regex(pattern)

dictionary.each do |entry|
Expand All @@ -68,8 +70,8 @@ def inspect #:nodoc:
private

# Takes the given pattern string "foo" and converts it to a new string
# "(f)([^/]*?)(o)([^/]*?)(o)"
# that can be used to create a regular expression.
# "^(.*?)(f)([^/]*?)(o)([^/]*?)(o)(.*)$"
# before returning the corresponding case-insensitive regular expression.
def make_fuzzy_search_regex(pattern)
pattern_chars = pattern.split(//)
pattern_chars << "" if pattern.empty?
Expand All @@ -83,23 +85,35 @@ def make_fuzzy_search_regex(pattern)
Regexp.new(regex_raw, Regexp::IGNORECASE)
end

# TODO document
def calculate_score(entry, runs)


# Determine the score of this match.
# 1. fewer "inside runs" (runs corresponding to the original pattern)
# is better.
# 2. better coverage of the actual path name is better
# Match entry against the regex. If it matches, yield the match metadata to the block.
def match_entry(entry, regex, &block)
if match = entry.match(regex)
match_result = build_match_result(match)

result = { :name => entry,
:highlighted_name => match_result[:runs].join,
:runs => match_result[:runs],
:score => match_result[:score]
}
yield result
end
end

# Determine the score of this match.
# 1. Fewer "inside runs" (runs corresponding to the original pattern) is better.
# 2. Better coverage of the actual word is better
def calculate_score(entry, runs)
inside_runs = runs.select { |r| r.inside }
inside_chars = inside_runs.inject(0) { |sum, run| sum + run.string.length }
total_chars = runs.inject(0) { |sum, run| sum + run.string.length }
inside_chars = inside_runs.collect { |r| r.string.length }.reduce(:+)
total_chars = runs.collect { |r| r.string.length }.reduce(:+)

run_ratio = inside_runs.length.zero? ? 1 : 1 / inside_runs.length.to_f
char_ratio = total_chars.zero? ? 1 : inside_chars.to_f / total_chars
# Item 1 above, fewer runs (i.e. more contiguous matches) is better.
run_ratio = 1.0 / inside_runs.length
# Item 2 above, a higher ratio of characters in the search to characters in the term is better.
char_ratio = inside_chars.to_f / total_chars

score = run_ratio * char_ratio
# Score is the product of these ratios.
run_ratio * char_ratio
end


Expand All @@ -123,18 +137,4 @@ def build_match_result(match)
score = calculate_score(match.string, runs)
return { :score => score, :runs => runs }
end

# Match entry against the regex. If it matches, yield the match metadata to the block.
def match_entry(entry, regex, &block)
if match = entry.match(regex)
match_result = build_match_result(match)

result = { :name => entry,
:highlighted_name => match_result[:runs].join,
:runs => match_result[:runs],
:score => match_result[:score]
}
yield result
end
end
end
35 changes: 35 additions & 0 deletions spec/fuzzy_text_matcher_spec.rb
Expand Up @@ -9,4 +9,39 @@
it "finds two countries for US" do
@matcher.find("US").should have(2).items
end

it "is case-insensitive" do
@matcher.find("US").should == @matcher.find("us")
end

it "finds the two countries with Z" do
countries_with_z = @matcher.find("z").collect { |m| m[:name] }.compact
countries_with_z.should == ["Brazil", "Belize"]
end

it "should rank consecutive runs higher than separated characters" do
countries_with_ta = @matcher.find("at")
usa = countries_with_ta.select { |m| m[:name] == "United States" }.first
argentina = countries_with_ta.select { |m| m[:name] == "Argentina" }.first
usa[:score].should be > argentina[:score]
end

it "should return no results when given an empty or nil argument" do
@matcher.find("").should be_empty
@matcher.find(nil).should be_empty
end

it "should return a 1.0 score for an exact match" do
results = @matcher.find("United States")
results.should have(1).items
results[0][:score].should == 1.0
end

it "should construct the regex properly" do
# Hack to test a private method.
def @matcher.make_fuzzy_search_regex_public(*args)
make_fuzzy_search_regex(*args)
end
@matcher.make_fuzzy_search_regex_public("foo").source.should == "^(.*?)(f)([^/]*?)(o)([^/]*?)(o)(.*)$"
end
end

0 comments on commit 24d53ca

Please sign in to comment.