Permalink
Browse files

sort RFC search results by popularity score first

  • Loading branch information...
1 parent d94e174 commit b9dcf682a443f5a646bfb09a2c723d3211835805 @mislav committed May 13, 2012
Showing with 33 additions and 1 deletion.
  1. +24 −0 Rakefile
  2. +8 −0 models.rb
  3. +1 −1 searchable.rb
View
@@ -56,3 +56,27 @@ file 'tmp/rfc-index.xml' do |task|
index_url = 'ftp://ftp.rfc-editor.org/in-notes/rfc-index.xml'
sh 'curl', '-#', index_url, '-o', task.name
end
+
+task :import_popular => :environment do
+ require 'nokogiri'
+ require 'open-uri'
+
+ popular = []
+ pop_url = 'http://www.faqs.org/rfc-pop%d.html'
+
+ (1..5).each do |n|
+ html = Nokogiri open(pop_url % n)
+ html.search('#fmaincolumn a[href^="/rfcs/"]').each do |link|
+ popular << File.basename(link['href'], '.html')
+ end
+ end
+
+ popular.each_with_index do |name, idx|
+ if entry = RfcEntry.get_rfc(name)
+ entry.popularity = idx + 1
+ entry.save!
+ else
+ warn "could not find #{name}"
+ end
+ end
+end
View
@@ -12,6 +12,7 @@ class RfcEntry
property :body, Text
property :obsoleted, Boolean, default: false
property :publish_date, Date
+ property :popularity, Integer
def keywords=(value)
if Array === value
@@ -23,5 +24,12 @@ def keywords=(value)
searchable title: 'A', keywords: 'B',
abstract: 'C', body: 'D'
+
+ def self.get_rfc num
+ num.to_s.gsub(/[^a-z0-9]+/i, '') =~ /^([a-z]*)(\d+)$/i
+ type, num = $1.to_s.upcase, Integer($2)
+ type = 'RFC' if type.empty?
+ get "#{type}%04d" % num
+ end
end
View
@@ -43,7 +43,7 @@ def search_raw query, options = {}
ts_rank_cd(search_vector, query) AS search_rank
FROM #{storage_name}, plainto_tsquery('english', ?) query
WHERE search_vector @@ query
- ORDER BY search_rank DESC
+ ORDER BY popularity, search_rank DESC
LIMIT ? OFFSET ?
SQL
end

0 comments on commit b9dcf68

Please sign in to comment.