Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 83 lines (69 sloc) 2.075 kb
f96f03b @mislav add basic search functionality
authored
1 task :environment do
2 require 'bundler'
3 Bundler.setup
4 require_relative 'app'
5 end
6
7 namespace :db do
8 task :rebuild => :environment do
9 DataMapper.auto_migrate!
10 end
11
12 task :migrate => :environment do
13 DataMapper.auto_upgrade!
14 end
15 end
16
17 task :import_index => ['tmp/rfc-index.xml', :environment] do |task|
18 require 'nokogiri'
19 require 'active_support/core_ext/object/try'
d94e174 @mislav improve RfcEntry storage model
authored
20 require 'date'
f96f03b @mislav add basic search functionality
authored
21
22 DataMapper.logger.set_log($stderr, :warn)
23
24 index = Nokogiri File.open(task.prerequisites.first)
d94e174 @mislav improve RfcEntry storage model
authored
25 num = 0
26
27 date_from_xml = ->(xml_date) {
28 if xml_date
29 year = xml_date.at('./year').text
30 month_name = xml_date.at('./month').text
31 day = xml_date.at('./day').try(:text)
32 Date.parse [year, month_name, day].join(' ')
33 end
34 }
f96f03b @mislav add basic search functionality
authored
35
36 index.search('rfc-entry').each do |xml_entry|
d94e174 @mislav improve RfcEntry storage model
authored
37 doc_id = xml_entry.at('./doc-id').text
38 unless entry = RfcEntry.get(doc_id)
39 entry = RfcEntry.new
40 entry.document_id = doc_id
41 entry.title = xml_entry.at('./title').text
42 entry.abstract = xml_entry.at('./abstract').try(:inner_html)
43 entry.keywords = xml_entry.search('./keywords/*').map(&:text)
44 end
45 entry.obsoleted = xml_entry.search('./obsoleted-by').any?
46 entry.publish_date = date_from_xml.(xml_entry.at('./date'))
47 num += 1 if entry.dirty?
f96f03b @mislav add basic search functionality
authored
48 entry.save!
49 end
d94e174 @mislav improve RfcEntry storage model
authored
50
51 puts "updated #{num} entries."
f96f03b @mislav add basic search functionality
authored
52 end
53
54 file 'tmp/rfc-index.xml' do |task|
55 mkdir_p 'tmp'
56 index_url = 'ftp://ftp.rfc-editor.org/in-notes/rfc-index.xml'
57 sh 'curl', '-#', index_url, '-o', task.name
58 end
b9dcf68 @mislav sort RFC search results by popularity score first
authored
59
60 task :import_popular => :environment do
61 require 'nokogiri'
62 require 'open-uri'
63
64 popular = []
65 pop_url = 'http://www.faqs.org/rfc-pop%d.html'
66
67 (1..5).each do |n|
68 html = Nokogiri open(pop_url % n)
69 html.search('#fmaincolumn a[href^="/rfcs/"]').each do |link|
70 popular << File.basename(link['href'], '.html')
71 end
72 end
73
74 popular.each_with_index do |name, idx|
75 if entry = RfcEntry.get_rfc(name)
76 entry.popularity = idx + 1
77 entry.save!
78 else
79 warn "could not find #{name}"
80 end
81 end
82 end
Something went wrong with that request. Please try again.