Permalink
Browse files

Downcase utf-8 chars of keywords

  • Loading branch information...
1 parent 77a30b9 commit c7012677f3938c6cebd2018d1ab160ad534a4824 @semaperepelitsa semaperepelitsa committed Dec 16, 2011
Showing with 6 additions and 2 deletions.
  1. +1 −1 lib/mongoid_search/util.rb
  2. +1 −1 spec/mongoid_search_spec.rb
  3. +4 −0 spec/util_spec.rb
@@ -29,10 +29,10 @@ def self.normalize_keywords(text, stem_keywords, ignore_list)
text = text.to_s.
mb_chars.
normalize(:kd).
+ downcase.
to_s.
gsub(/[._:;'"`,?|+={}()!@#%^&*<>~\$\-\\\/\[\]]/, ' '). # strip punctuation
gsub(/[^[:alnum:]\s]/,''). # strip accents
- downcase.
split(' ').
reject { |word| word.size < 2 }
text = text.reject { |word| ignore_list.include?(word) } unless ignore_list.blank?
@@ -26,7 +26,7 @@
}
it "should leave utf8 characters" do
- @product._keywords.should == ["amazing", "awesome", "ole", "Процессор", "Эльбрус", "процессоры"]
+ @product._keywords.should == ["amazing", "awesome", "ole", "процессор", "процессоры", "эльбрус"]
end
end
View
@@ -22,6 +22,10 @@
Util.normalize_keywords("CaFé", false, "").should == ["cafe"]
end
+ it "should downcase utf-8 chars of the text passed" do
+ Util.normalize_keywords("Кафе", false, "").should == ["кафе"]
+ end
+
it "should split whitespaces, hifens, dots, underlines, etc.." do
Util.normalize_keywords("CaFé-express.com delicious;come visit, and 'win' an \"iPad\"", false, "").should == ["cafe", "express", "com", "delicious", "come", "visit", "and", "win", "an", "ipad"]
end

0 comments on commit c701267

Please sign in to comment.