Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
branch: master
Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

file 100 lines (84 sloc) 2.739 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
task :environment do
  require_relative 'app'
end

desc %(Create the database, prepare schema, import RFC index)
task :bootstrap => [:'db:bootstrap', :import_index, :import_popular]

namespace :db do
  desc %(Rebuild the db schema)
  task :rebuild => :environment do
    DataMapper.auto_migrate!
  end

  desc %(Automatically migrate the db schema based on changed model attributes)
  task :migrate => :environment do
    DataMapper.auto_upgrade!
  end

  desc %(Create and upgrade the database schema if necessary)
  task :bootstrap => :environment do
    if RfcEntry.storage_exists?
      Rake::Task[:'db:migrate'].invoke
    else
      Rake::Task[:'db:rebuild'].invoke
    end
  end
end

desc %(Import the complete RFC index into the database)
task :import_index => ['tmp/rfc-index.xml', :environment] do |task|
  require 'nokogiri'
  require 'active_support/core_ext/object/try'
  require 'date'

  DataMapper.logger.set_log($stderr, :warn)

  index = Nokogiri File.open(task.prerequisites.first)
  num = 0

  date_from_xml = ->(xml_date) {
    if xml_date
      year = xml_date.at('./year').text
      month_name = xml_date.at('./month').text
      day = xml_date.at('./day').try(:text)
      Date.parse [year, month_name, day].join(' ')
    end
  }

  index.search('rfc-entry').each do |xml_entry|
    doc_id = xml_entry.at('./doc-id').text
    unless entry = RfcEntry.get(doc_id)
      entry = RfcEntry.new
      entry.document_id = doc_id
      entry.title = xml_entry.at('./title').text
      entry.abstract = xml_entry.at('./abstract').try(:inner_html)
      entry.keywords = xml_entry.search('./keywords/*').map(&:text)
    end
    entry.obsoleted = xml_entry.search('./obsoleted-by').any?
    entry.publish_date = date_from_xml.(xml_entry.at('./date'))
    num += 1 if entry.dirty?
    entry.save
  end

  puts "updated #{num} entries (%d in database)." % RfcEntry.count
end

file 'tmp/rfc-index.xml' do |task|
  mkdir_p 'tmp'
  index_url = 'ftp://ftp.rfc-editor.org/in-notes/rfc-index.xml'
  sh 'curl', '-#', index_url, '-o', task.name
end

desc %(Update the RFCs in the database with a popularity score)
task :import_popular => :environment do
  require 'nokogiri'
  require 'open-uri'

  popular = []
  pop_url = 'http://www.faqs.org/rfc-pop%d.html'
  num = 0

  (1..5).each do |n|
    html = Nokogiri open(pop_url % n)
    html.search('#fmaincolumn a[href^="/rfcs/"]').each do |link|
      popular << File.basename(link['href'], '.html')
    end
  end

  popular.each_with_index do |name, idx|
    if entry = RfcEntry.get(name)
      entry.popularity = idx + 1
      entry.save
      num += 1
    else
      warn "could not find #{name}"
    end
  end
  puts "applied popular score to #{num} entries."
end
Something went wrong with that request. Please try again.