Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

implement automatic fetching & rendering of RFCs

  • Loading branch information...
commit 9b476ad6a7718bdae4d3825f15b274d1c91b8d67 1 parent b3de82f
@mislav authored
View
1  Gemfile
@@ -20,3 +20,4 @@ gem 'erubis'
gem 'dm-postgres-adapter'
gem 'dm-migrations'
+gem 'dm-timestamps'
View
3  Gemfile.lock
@@ -22,6 +22,8 @@ GEM
dm-postgres-adapter (1.2.0)
dm-do-adapter (~> 1.2.0)
do_postgres (~> 0.10.6)
+ dm-timestamps (1.2.0)
+ dm-core (~> 1.2.0)
do_postgres (0.10.8)
data_objects (= 0.10.8)
erubis (2.7.0)
@@ -61,6 +63,7 @@ DEPENDENCIES
coffee-script
dm-migrations
dm-postgres-adapter
+ dm-timestamps
erubis
i18n
nokogiri
View
1  Procfile
@@ -1 +1,2 @@
web: bundle exec rackup config.ru -s thin -p $PORT
+console: irb -r./app --simple-prompt
View
13 Rakefile
@@ -19,7 +19,7 @@ task :import_index => ['tmp/rfc-index.xml', :environment] do |task|
require 'active_support/core_ext/object/try'
require 'date'
- DataMapper.logger.set_log($stderr, :warn)
+ DataMapper.logger.set_log($stderr, :debug)
index = Nokogiri File.open(task.prerequisites.first)
num = 0
@@ -45,10 +45,10 @@ task :import_index => ['tmp/rfc-index.xml', :environment] do |task|
entry.obsoleted = xml_entry.search('./obsoleted-by').any?
entry.publish_date = date_from_xml.(xml_entry.at('./date'))
num += 1 if entry.dirty?
- entry.save!
+ entry.save
end
- puts "updated #{num} entries."
+ puts "updated #{num} entries (%d in database)." % RfcEntry.count
end
file 'tmp/rfc-index.xml' do |task|
@@ -63,6 +63,7 @@ task :import_popular => :environment do
popular = []
pop_url = 'http://www.faqs.org/rfc-pop%d.html'
+ num = 0
(1..5).each do |n|
html = Nokogiri open(pop_url % n)
@@ -72,11 +73,13 @@ task :import_popular => :environment do
end
popular.each_with_index do |name, idx|
- if entry = RfcEntry.get_rfc(name)
+ if entry = RfcEntry.get(name)
entry.popularity = idx + 1
- entry.save!
+ entry.save
+ num += 1
else
warn "could not find #{name}"
end
end
+ puts "applied popular score to #{num} entries."
end
View
33 app.rb
@@ -1,7 +1,6 @@
# encoding: utf-8
require 'sinatra'
require_relative 'lib/sinatra_boilerplate'
-require_relative 'rfc'
set :sass do
options = {
@@ -39,10 +38,15 @@
require_relative 'models'
+configure do
+ DataMapper.finalize
+ DataMapper::Model.raise_on_save_failure = true
+ RfcFetcher.download_dir = File.expand_path('../tmp/xml', __FILE__)
+end
+
helpers do
- def display_document_id doc_id
- doc_id = doc_id.document_id if doc_id.respond_to? :document_id
- doc_id.sub(/(\d+)/, ' \1')
+ def display_document_id doc
+ doc.id.sub(/(\d+)/, ' \1')
end
def display_abstract text
@@ -54,8 +58,8 @@ def search_path options = {}
url '/search?' + Rack::Utils.build_query(get_params), false
end
- def rfc_path doc_id
- doc_id = doc_id.document_id if doc_id.respond_to? :document_id
+ def rfc_path doc
+ doc_id = String === doc ? doc : doc.id
url doc_id, false
end
@@ -83,15 +87,20 @@ def page_title title = nil
end
get "/search" do
+ expires 5 * 60, :public
@query = params[:q]
@limit = 50
- @results = RfcEntry.search_raw @query, page: params[:page], limit: @limit
+ @results = RfcDocument.search @query, page: params[:page], limit: @limit
erb :search
end
-get "/oauth" do
- expires 3600, :public
- doc = RFC::Document.new File.open('draft-ietf-oauth-v2-25.xml')
- html = RFC::TemplateHelpers.render doc
- render :str, html, {layout_engine: :erb}, title: "OAuth 2.0"
+get "/:doc_id" do
+ @rfc = RfcDocument.fetch(params[:doc_id]) { halt 404 }
+ redirect to(@rfc.id) unless request.path == "/#{@rfc.id}"
+
+ cache_control :public
+ last_modified @rfc.last_modified
+
+ @rfc.make_pretty ->(xref) { rfc_path(xref) if xref =~ /^RFC\d+$/ }
+ erb :show
end
View
3,672 draft-ietf-oauth-v2-25.xml
0 additions, 3,672 deletions not shown
View
181 models.rb
@@ -1,18 +1,92 @@
+require_relative 'rfc'
+
+class RfcDocument
+ extend Forwardable
+
+ attr_reader :entry
+ def_delegators :entry, :title, :abstract, :body, :publish_date
+
+ def_delegator :entry, :document_id, :id
+ def_delegator :entry, :obsoleted, :obsoleted?
+ def_delegator :entry, :updated_at, :last_modified
+
+ class << self
+ alias_method :wrap, :new
+
+ def search query, options = {}
+ RfcEntry.search_raw(query, options).map {|e| wrap e }
+ end
+
+ def fetch doc_id
+ entry = RfcEntry.get doc_id
+ entry ? wrap(entry) : yield
+ end
+ end
+
+ def initialize entry
+ @entry = entry
+ end
+
+ def external_url
+ "http://datatracker.ietf.org/doc/#{id.downcase}/"
+ end
+
+ def pretty?
+ !entry.body.nil?
+ end
+
+ def make_pretty href_resolver
+ if entry.fetcher_version.nil?
+ fetcher = RfcFetcher.new self.id
+ entry.xml_source = fetcher.xml_url
+ entry.fetcher_version = fetcher.version
+
+ if fetcher.fetchable?
+ fetcher.fetch
+ doc = File.open(fetcher.path) {|file| RFC::Document.new file }
+ doc.href_resolver = href_resolver
+ entry.body = RFC::TemplateHelpers.render doc
+ end
+ entry.save
+ end
+ end
+end
+
require 'dm-migrations'
+require 'dm-timestamps'
require_relative 'searchable'
class RfcEntry
include DataMapper::Resource
extend Searchable
- property :document_id, String, length: 10, key: true
- property :title, String, length: 255
- property :abstract, Text, length: 2200
- property :keywords, Text, length: 500
- property :body, Text
- property :obsoleted, Boolean, default: false
- property :publish_date, Date
- property :popularity, Integer
+ property :document_id, String, length: 10, key: true
+ property :title, String, length: 255
+ property :abstract, Text, length: 2200
+ property :keywords, Text, length: 500
+ property :body, Text
+ property :obsoleted, Boolean, default: false
+ property :publish_date, Date
+ property :popularity, Integer
+ property :xml_source, String, length: 100
+ property :fetcher_version, Integer
+
+ timestamps :updated_at
+
+ class << self
+ def get doc_id
+ super normalize_document_id(doc_id)
+ end
+
+ private
+
+ def normalize_document_id doc_id
+ doc_id.to_s.gsub(/[^a-z0-9]+/i, '') =~ /^([a-z]*)(\d+)$/i
+ type, num = $1.to_s.upcase, $2.to_i
+ type = 'RFC' if type.empty?
+ "#{type}%04d" % num
+ end
+ end
def keywords=(value)
if Array === value
@@ -24,12 +98,91 @@ def keywords=(value)
searchable title: 'A', keywords: 'B',
abstract: 'C', body: 'D'
+end
+
+require 'fileutils'
+require 'net/http'
+require 'nokogiri'
- def self.get_rfc num
- num.to_s.gsub(/[^a-z0-9]+/i, '') =~ /^([a-z]*)(\d+)$/i
- type, num = $1.to_s.upcase, Integer($2)
- type = 'RFC' if type.empty?
- get "#{type}%04d" % num
+class RfcFetcher
+ XML_URL = 'http://xml.resource.org/public/rfc/xml/%s.xml'
+ DRAFTS_URL = 'http://www.ietf.org/id/'
+ TRACKER_URL = 'http://datatracker.ietf.org/doc/%s/'
+
+ class << self
+ attr_accessor :download_dir
+
+ def version() 1 end
+ end
+ self.download_dir = File.join(ENV['TMPDIR'] || '/tmp', 'rfc-xml')
+
+ attr_reader :path
+
+ def initialize doc_id
+ @doc_id = doc_id.to_s.downcase
+ end
+
+ def version() self.class.version end
+
+ def xml_url
+ return @xml_url if defined? @xml_url
+ @xml_url = find_xml
+ end
+
+ def fetchable?
+ !xml_url.nil?
+ end
+
+ def fetch
+ @path = File.join self.class.download_dir, @doc_id + '.xml'
+ unless File.exist? @path
+ FileUtils.mkdir_p File.dirname(@path)
+ system 'curl', '--silent', xml_url.to_s, '-o', @path
+ end
+ end
+
+ def request url
+ url = URI(url)
+ res = Net::HTTP.start(url.host, url.port) {|http| yield http, url.request_uri }
+ res.error! if Net::HTTPServerError === res
+ res
+ end
+
+ def http_exist? url
+ Net::HTTPOK === request(url) {|http, path| http.head path }
+ end
+
+ def find_xml
+ xml_url = XML_URL % @doc_id
+ if @doc_id.start_with? 'rfc' and http_exist? xml_url
+ xml_url
+ else
+ find_tracker_xml
+ end
end
-end
+ def get_html url
+ res = request(url) {|http, path| http.get path }
+ yield Nokogiri(res.body) if Net::HTTPOK === res
+ end
+
+ def find_tracker_xml
+ get_html TRACKER_URL % @doc_id do |html|
+ if href = html.at('//table[@id="metatable"]//a[text()="xml"]/@href')
+ href.text
+ elsif html.search('#metatable td:nth-child(2)').text =~ /^Was (draft-[\w-]+)/
+ find_draft_xml $1
+ end
+ end
+ end
+
+ def find_draft_xml draft_name
+ drafts_url = URI(DRAFTS_URL)
+ get_html drafts_url do |html|
+ html.search("a[href*=#{draft_name}]").
+ map {|link| (drafts_url + link['href']).to_s }.
+ select {|href| File.basename(href, '.xml') =~ /^#{draft_name}(-\d+)?$/ }.
+ sort.last
+ end
+ end
+end
View
118 rfc.rb
@@ -1,8 +1,10 @@
require 'nokogiri'
require 'delegate'
+require 'forwardable'
require 'active_support/memoizable'
require 'active_support/core_ext/object/blank'
require 'active_support/core_ext/string/inflections'
+require 'active_support/core_ext/array/grouping'
require 'erubis'
module RFC
@@ -29,6 +31,16 @@ def wrap(node, klass, *args)
element
end
+ IGNORED_ELEMENTS = %w[iref cref]
+
+ def element_names
+ element_children.map(&:node_name) - IGNORED_ELEMENTS
+ end
+
+ def text_children
+ children.select(&:text?)
+ end
+
def template_name
self.class.name.demodulize.underscore
end
@@ -120,11 +132,22 @@ def id
end
def elements
- element_children.map do |node|
+ element_children.each_with_object([]) do |node, all|
case node.name
- when 'section' then wrap(node, Section, self)
- when 'figure' then wrap(node, Figure)
- when 't' then wrap(node, Text)
+ when 'section' then all << wrap(node, Section, self)
+ when 'figure' then all << wrap(node, Figure)
+ when 'texttable' then all << wrap(node, Table)
+ when 't'
+ text = wrap(node, Text)
+ in_definition_list = all.last.is_a? DefinitionList
+ if text.definition? in_definition_list
+ all << DefinitionList.new(document) unless in_definition_list
+ all.last.add_element text
+ else
+ all << text
+ end
+ when 'iref', 'cref'
+ # ignore
else
raise "unrecognized section-level node: #{node.name}"
end
@@ -146,11 +169,7 @@ def target
end
def href
- if (target =~ /^[\w-]+:/) == 0
- target
- else
- '#' + target.parameterize
- end
+ document.href_for(target)
end
end
@@ -158,7 +177,7 @@ class Span < NodeWrapper
end
class Text < NodeWrapper
- def blocks
+ def elements
children.each_with_object([[]]) do |node, all|
if node.element?
case node.name
@@ -170,7 +189,9 @@ def blocks
all.last << wrap(node, Xref)
when 'spanx'
all.last << wrap(node, Span)
- when 'iref'
+ when 'figure'
+ all.last << wrap(node, Figure)
+ when 'iref', 'cref'
# ignore
else
$stderr.puts node.inspect if $-d
@@ -183,13 +204,33 @@ def blocks
end
def list?
- element_children.map(&:node_name) == %w[list] and
- children.select(&:text?).all?(&:blank?)
+ element_names == %w[list] and text_children.all?(&:blank?)
end
def list
wrap('./list', List)
end
+
+ # The element is a definition list item when it contains only 1 text node
+ # (definition title) and a list with a single item (definition description).
+ #
+ # However, if this element follows another definition item, then the inner
+ # list can have multiple items.
+ def definition? following_another = false
+ element_names == %w[list] and title = definition_title and
+ following_another || list.element_names == %w[t]
+ end
+
+ def definition_title
+ nodes = text_children.select {|t| !t.blank? }
+ if nodes.size == 1 and !nodes.first.text.strip.include?("\n")
+ nodes.first
+ end
+ end
+
+ def definition_description
+ search('./list/t').map {|t| wrap(t, Text) }
+ end
end
class List < NodeWrapper
@@ -208,6 +249,21 @@ def style
type = 'alpha' if type == 'format (%C)'
type
end
+
+ def note?
+ first_element_child.text =~ /\A\s*Note:\s/
+ end
+ end
+
+ class DefinitionList < Struct.new(:document, :elements)
+ extend Forwardable
+ def_delegator :elements, :<<, :add_element
+
+ def initialize(doc, els = [])
+ super(doc, els)
+ end
+
+ def template_name() 'definition_list' end
end
class Figure < NodeWrapper
@@ -251,6 +307,24 @@ def unindent(text)
end
end
+ class Table < NodeWrapper
+ def columns
+ search('./ttcol')
+ end
+
+ def rows
+ search('./c').map {|c| wrap(c, Text) }.in_groups_of(columns.size, false)
+ end
+
+ def preamble
+ wrap('./preamble', Text) { |t| t.classnames << 'preamble' }
+ end
+
+ def postamble
+ wrap('./postamble', Text) { |t| t.classnames << 'postamble' }
+ end
+ end
+
class Reference < NodeWrapper
def id?
self['anchor'].present?
@@ -282,6 +356,8 @@ def series
end
class Document < NodeWrapper
+ attr_accessor :href_resolver
+
def initialize(from)
super Nokogiri::XML(from)
scope '/rfc'
@@ -353,6 +429,7 @@ def keywords
all('./front/keyword/text()').map(&:text)
end
+ # TODO: add memoization
def anchor_map
all('.//*[@anchor]').each_with_object({}) do |node, map|
map[node['anchor']] = node
@@ -373,6 +450,15 @@ def lookup_anchor(name)
end
end
+ def href_for(target)
+ if (target =~ /^[\w-]+:/) == 0
+ target
+ else
+ href_resolver && href_resolver.call(target) ||
+ ('#' + target.parameterize)
+ end
+ end
+
def references
all('./back/references/reference').map {|node| wrap(node, Reference) }
end
@@ -393,6 +479,8 @@ def class_attribute(names = classnames)
end
def render_inline(elements)
+ # Array() doesn't work with text node, for some reason
+ elements = [elements] unless Array === elements
elements.map do |el|
if el.is_a? Xref
link_to el.text, el.href
@@ -417,6 +505,10 @@ def link_to(text, href, classnames = nil)
def mail_to(email, text = email, classnames = nil)
link_to text, "mailto:#{email}", classnames
end
+
+ def debug(obj)
+ %(<pre>#{h obj.inspect}</pre>)
+ end
end
module TemplateHelpers
View
10 templates/definition_list.erb
@@ -0,0 +1,10 @@
+<% if elements.size > 1 %>
+<dl><% for item in elements %>
+ <dt><%= render_inline item.definition_title %></dt>
+ <dd><% for description in item.definition_description %>
+ <%= render description %>
+ <% end %></dd>
+<% end %></dl>
+<% else %>
+ <%= render self, 'text' %>
+<% end %>
View
18 templates/document.erb
@@ -1,19 +1,19 @@
<style>body { string-set: short-title "<%== (short_title || title).gsub('"', '\"') %>" }</style>
-<!--<meta name="keywords" content="<%== keywords.join(', ') %>">-->
<article>
<section class="titlepage">
- <% if number? %><p class="rfc">
- RFC <%== number %>
- <% if category? %>(<%== category %> document)<% end %>
- </p><% end %>
- <h1><%== title %></h1>
+ <div class=page-header>
+ <h1>
+ <%== title %>
+ <small>RFC&nbsp;<%== number %></small>
+ </h1>
+ </div>
<% if abstract? %><%= render abstract %><% end %>
<% if authors? %>
<section class="authors">
- <p>Authors:</p>
+ <h3>Authors:</h3>
<ol><% for author in authors %>
<li class="vcard">
<%= link_to author.name, author.url, %w[url fn] %><% if author.organization? %>,
@@ -27,7 +27,9 @@
</section>
<aside>
- <ol class=toc><% for section in sections %>
+ <ol class=toc>
+ <li>RFC&nbsp;<%== number %></li>
+ <% for section in sections %>
<li><a href="#<%= section.id %>"><%= section.title %></a></li>
<ol><% for subsection in section.sections %>
<li><a href="#<%= subsection.id %>"><%= subsection.title %></a></li>
View
8 templates/list.erb
@@ -1,9 +1,14 @@
+<% if note? %>
+ <div class=note>
+ <%= render self, 'text' %>
+ </div>
+<% else %>
<% case style %>
<% when 'symbols', 'empty' %>
<ul class="<%= style %>"><% for element in elements %>
<li><%= render element %></li>
<% end %></ul>
-<% when 'numbers', 'alpha' %>
+<% when 'numbers', 'alpha', 'letters', 'format' %>
<ol class="<%= style %>"><% for element in elements %>
<li><%= render element %></li>
<% end %></ol>
@@ -14,3 +19,4 @@
<% end %></dl>
<% else raise "unrecognized list style: #{style.inspect}" %>
<% end %>
+<% end %>
View
26 templates/table.erb
@@ -0,0 +1,26 @@
+<% if preamble? %>
+ <%= render preamble %>
+<% end %>
+
+<table class="table table-bordered">
+ <thead>
+ <tr>
+ <% for col in columns %>
+ <th><%== col.text %></th>
+ <% end %>
+ </tr>
+ </thead>
+ <tbody>
+ <% for row in rows %>
+ <tr>
+ <% for cell in row %>
+ <td><%= render cell %></td>
+ <% end %>
+ </tr>
+ <% end %>
+ </tbody>
+</table>
+
+<% if postamble? %>
+ <%= render postamble %>
+<% end %>
View
10 templates/text.erb
@@ -1,11 +1,11 @@
-<% if list? %>
+<% if respond_to? :list? and list? %>
<%= render list %>
<% else %>
- <% for block in blocks %>
- <% if block.is_a? Array %>
- <p<%= class_attribute %>><%= render_inline block %></p>
+ <% for element in elements %>
+ <% if element.is_a? Array %>
+ <p<%= class_attribute %>><%= render_inline element %></p>
<% else %>
- <%= render block %>
+ <%= render element %>
<% end %>
<% end %>
<% end %>
View
4 views/_toc.scss
@@ -30,9 +30,7 @@ article, .article {
.titlepage {
h2, h3, h4 { &::before { content: normal } }
}
- .titlepage + section, .titlepage + .section {
- counter-reset: chapter;
- }
+ .titlepage + * { counter-reset: chapter }
// poor man's nested counters
> section, > .section {
counter-reset: section;
View
8 views/search.erb
@@ -15,11 +15,11 @@
<% if @results.any? %>
<ol>
<% for rfc in @results %>
- <li<%= ' class=obsoleted' if rfc.obsoleted %>>
- <span class=document-id><%= display_document_id rfc %></span>
- <% if rfc.obsoleted %><span class="label label-warning">obsolete</span><% end %>
+ <li<%= ' class=obsoleted' if rfc.obsoleted? %>>
+ <a class=document-id href="<%= rfc_path rfc %>"><%= display_document_id rfc %></a>
+ <% if rfc.obsoleted? %><span class="label label-warning">obsolete</span><% end %>
<h2><a href="<%= rfc_path rfc %>"><%= rfc.title %></a></h2>
- <%= rfc.abstract.try(:sub, /\[STANDARDS[ -]{1,2}TRA?CK\]/, '') %>
+ <%= display_abstract rfc.abstract %>
</li>
<% end %>
</ol>
View
25 views/show.erb
@@ -0,0 +1,25 @@
+<% page_title '%s: %s' % [display_document_id(@rfc), @rfc.title] %>
+
+<% if @rfc.pretty? %>
+ <%= @rfc.body %>
+<% else %>
+ <div class=container>
+ <div class=page-header>
+ <h1>
+ <%== @rfc.title %>
+ <small><%= display_document_id @rfc %></small>
+ </h1>
+ </div>
+ <div class="alert alert-block">
+ <h4 class=alert-heading>Could not prettify this RFC</h4>
+ <p>The source XML for this RFC isn't available, therefore it can't be reformatted.</p>
+ </div>
+ <%= display_abstract @rfc.abstract %>
+
+ <ul class=unstyled>
+ <li>
+ <a class=btn href="<%= @rfc.external_url %>">Read the full version &rarr;</a>
+ </li>
+ </ul>
+ </div>
+<% end %>
View
86 views/style.sass
@@ -37,53 +37,55 @@ ul.empty
list-style: none
ol.alpha
list-style: upper-latin
+ol.letters
+ list-style: upper-roman
article
padding: $gridGutterWidth 0
position: relative
- h1
- border-bottom: 3px solid $grayLighter
- margin-bottom: $baseLineHeight
+ +screen(nonphone)
+ max-width: 50em
+ padding-left: $sidebarWidth + $gridGutterWidth
-aside
- +screen(phone)
- display: none
- ol
- margin: 0
- padding: 0
- list-style: none
- li
+ div.note
+ @extend .well
+
+ aside
+ +screen(phone)
+ display: none
+ +screen(nonphone)
+ display: block
+ width: $sidebarWidth
+ padding: $gridGutterWidth ($gridGutterWidth / 2)
+ border-right: 1px solid silver
+ position: fixed
+ top: 0
+ left: 0
+ bottom: 0
+ -webkit-box-sizing: border-box
+ -moz-box-sizing: border-box
+ box-sizing: border-box
+ overflow-y: auto
+ overflow-x: hidden
+ -webkit-overflow-scrolling: touch
+ > ol > li
+ font-weight: bold
+ ol
margin: 0
padding: 0
- ol
- list-style: square
- margin-left: $baseLineHeight
- margin-bottom: $baseLineHeight / 2
+ list-style: none
li
- color: $grayLight
- padding-top: $baseFontSize / 3
- line-height: $baseFontSize
- a
- color: $grayDark
- &:hover
- color: $grayDarker
-
-+screen(nonphone)
- article
- max-width: 50em
- padding-left: $sidebarWidth + $gridGutterWidth
- aside
- display: block
- width: $sidebarWidth
- padding: $gridGutterWidth ($gridGutterWidth / 2)
- border-right: 1px solid silver
- position: fixed
- top: 0
- left: 0
- bottom: 0
- -webkit-box-sizing: border-box
- -moz-box-sizing: border-box
- box-sizing: border-box
- overflow-y: auto
- overflow-x: hidden
- -webkit-overflow-scrolling: touch
+ margin: 0
+ padding: 0
+ ol
+ list-style: square
+ margin-left: $baseLineHeight
+ margin-bottom: $baseLineHeight / 2
+ li
+ color: $grayLight
+ padding-top: $baseFontSize / 3
+ line-height: $baseFontSize
+ a
+ color: $grayDark
+ &:hover
+ color: $grayDarker
Please sign in to comment.
Something went wrong with that request. Please try again.