Permalink
Browse files

Merge branch 'hpricot'

  • Loading branch information...
2 parents a20d014 + ed7987b commit f1b6c5e641a77be9f505899ee10d5edd562b3fbc @rtomayko committed Mar 8, 2010
Showing with 159 additions and 149 deletions.
  1. +1 −1 Rakefile
  2. +2 −1 bin/ron
  3. +14 −15 lib/ron/document.rb
  4. +119 −109 lib/ron/roff.rb
  5. +5 −4 man/ron.1
  6. +4 −3 man/ron.1.ron
  7. +2 −2 ron.gemspec
  8. +4 −3 test/angle_bracket_syntax.html
  9. +1 −0 test/basic_document.html
  10. +1 −0 test/custom_title_document.html
  11. +6 −11 test/definition_list_syntax.html
View
@@ -3,7 +3,7 @@ require 'rake/clean'
task :default => :test
task :environment do
- require_library 'nokogiri'
+ require_library 'hpricot'
require_library 'rdiscount'
ENV['RUBYLIB'] = "#{$:.join(':')}:#{ENV['RUBYLIB']}"
ENV['PATH'] = "bin:#{ENV['PATH']}"
View
@@ -88,7 +88,8 @@ pid = nil
begin
require 'ron'
-rescue LoadError
+rescue LoadError => boom
+ raise unless boom.to_s.include?('ron')
$:.unshift File.dirname(__FILE__) + "../lib"
require 'ron'
end
View
@@ -1,5 +1,5 @@
require 'set'
-require 'nokogiri'
+require 'hpricot'
require 'rdiscount'
require 'ron/roff'
@@ -188,21 +188,21 @@ def definition_list_filter(html)
# process all unordered lists depth-first
doc.search('ul').to_a.reverse.each do |ul|
items = ul.search('li')
- next if items.any? { |item| item.text.split("\n", 2).first !~ /:$/ }
+ next if items.any? { |item| item.inner_text.split("\n", 2).first !~ /:$/ }
ul.name = 'dl'
items.each do |item|
- if item.child.name == 'p'
+ if child = item.at('p')
wrap = '<p></p>'
- container = item.child
+ container = child
else
wrap = '<dd></dd>'
container = item
end
term, definition = container.inner_html.split(":\n", 2)
- dt = item.before("<dt>#{term}</dt>").previous_sibling
- dt['class'] = 'flush' if dt.content.length <= 7
+ dt = item.before("<dt>#{term}</dt>").first
+ dt.attributes['class'] = 'flush' if dt.inner_text.length <= 7
item.name = 'dd'
container.swap(wrap.sub(/></, ">#{definition}<"))
@@ -216,14 +216,13 @@ def angle_quote_post_filter(html)
doc = parse_html(html)
# convert all angle quote vars nested in code blocks
# back to the original text
- doc.search('code text()').each do |node|
- next unless node.to_s.include?('var&gt;')
- new = node.document.create_text_node(
- node.to_s.
- gsub('&lt;var&gt;', '<').
+ doc.search('code').search('text()').each do |node|
+ next unless node.to_html.include?('var&gt;')
+ new =
+ node.to_html.
+ gsub('&lt;var&gt;', '&lt;').
gsub("&lt;/var&gt;", '>')
- )
- node.replace(new)
+ node.swap(new)
end
doc
end
@@ -279,10 +278,10 @@ def angle_quote_pre_filter(data)
private
def parse_html(html)
- if html.kind_of?(Nokogiri::HTML::DocumentFragment)
+ if html.respond_to?(:doc?) && html.doc?
html
else
- Nokogiri::HTML.fragment(html.to_s)
+ Hpricot(html.to_s)
end
end
end
View
@@ -1,12 +1,13 @@
-require 'nokogiri'
+require 'hpricot'
module Ron
class RoffFilter
# Convert Ron HTML to roff.
def initialize(html, name, section, tagline, manual=nil, version=nil, date=nil)
@buf = []
title_heading name, section, tagline, manual, version, date
- block_filter(Nokogiri::HTML.fragment(html))
+ html = Hpricot(html)
+ block_filter(html)
write "\n"
end
@@ -15,149 +16,158 @@ def to_s
end
protected
+ def previous(node)
+ if node.respond_to?(:previous)
+ prev = node.previous
+ prev = prev.previous until prev.nil? || prev.elem?
+ prev
+ end
+ end
+
def title_heading(name, section, tagline, manual, version, date)
comment "generated with Ron/v#{Ron::VERSION}"
comment "http://github.com/rtomayko/ron/"
macro "TH", %["#{escape(name.upcase)}" "#{section}" "#{date.strftime('%B %Y')}" "#{version}" "#{manual}"]
end
def block_filter(node)
- if node.kind_of?(Nokogiri::XML::NodeSet)
+ if node.kind_of?(Array) || node.kind_of?(Hpricot::Elements)
node.each { |ch| block_filter(ch) }
- return
- end
- prev = node.previous_sibling
- prev = prev.previous_sibling until prev.nil? || prev.element?
+ elsif node.doc?
+ block_filter(node.children)
- case node.name
+ elsif node.text?
+ return if node.to_html =~ /^\s*$/m
+ warn "unexpected text: %p", node
+
+ elsif node.elem?
+ case node.name
+ when 'h2'
+ macro "SH", quote(escape(node.html))
+ when 'h3'
+ macro "SS", quote(escape(node.html))
+
+ when 'p'
+ prev = previous(node)
+ if prev && %w[dd li].include?(node.parent.name)
+ macro "IP"
+ elsif prev && !%w[h1 h2 h3].include?(prev.name)
+ macro "P"
+ end
+ inline_filter(node.children)
- # non-element nodes
- when '#document-fragment'
- block_filter(node.children)
- when 'text'
- return if node.text =~ /^\s*$/m
- warn "unexpected text: %p", node.text
-
- # headings
- when 'h2'
- macro "SH", quote(escape(node.content))
- when 'h3'
- macro "SS", quote(escape(node.content))
-
- # paragraphs
- when 'p'
- if prev && %w[dd li].include?(node.parent.name)
- macro "IP"
- elsif prev && !%w[h1 h2 h3].include?(prev.name)
- macro "P"
- end
- inline_filter(node.children)
- when 'pre'
- indent = prev.nil? || !%w[h1 h2 h3].include?(prev.name)
- macro "IP", %w["" 4] if indent
- macro "nf"
- write "\n"
- inline_filter(node.search('code').children, decode_entities=false)
- macro "fi"
- macro "IP", %w["" 0] if indent
-
- # definition lists
- when 'dl'
- macro "TP"
- block_filter(node.children)
- when 'dt'
- macro "TP" unless prev.nil?
- inline_filter(node.children)
- write "\n"
- when 'dd'
- if node.search('p').any?
+ when 'pre'
+ prev = previous(node)
+ indent = prev.nil? || !%w[h1 h2 h3].include?(prev.name)
+ macro "IP", %w["" 4] if indent
+ macro "nf"
+ write "\n"
+ inline_filter(node.search('code > *'))
+ macro "fi"
+ macro "IP", %w["" 0] if indent
+
+ when 'dl'
+ macro "TP"
block_filter(node.children)
- else
+ when 'dt'
+ prev = previous(node)
+ macro "TP" unless prev.nil?
inline_filter(node.children)
- end
- write "\n"
-
- # ordered/unordered lists
- # when 'ol'
- # macro "IP", '1.'
- # block_filter(node.children)
- when 'ul'
- block_filter(node.children)
- macro "IP", %w["" 0]
- when 'li'
- case node.parent.name
+ write "\n"
+ when 'dd'
+ if node.search('p').any?
+ block_filter(node.children)
+ else
+ inline_filter(node.children)
+ end
+ write "\n"
+
+ # when 'ol'
+ # macro "IP", '1.'
+ # block_filter(node.children)
when 'ul'
- macro "IP", %w["\(bu" 4]
- end
- if node.search('p', 'ol', 'ul', 'dl', 'div').any?
block_filter(node.children)
+ macro "IP", %w["" 0]
+ when 'li'
+ case node.parent.name
+ when 'ul'
+ macro "IP", %w["\(bu" 4]
+ end
+ if node.search('p|ol|ul|dl|div').any?
+ block_filter(node.children)
+ else
+ inline_filter(node.children)
+ end
+ write "\n"
+
else
- inline_filter(node.children)
+ warn "unrecognized block tag: %p", node.name
end
- write "\n"
else
- warn "unrecognized block tag: %p", node.name
+ fail "unexpected node: #{node.inspect}"
end
end
- def inline_filter(node, decode_entities=true)
- if node.kind_of?(Nokogiri::XML::NodeSet)
- node.each { |ch| inline_filter(ch, decode_entities) }
- return
- end
-
- prev = node.previous_sibling
- prev = prev.previous_sibling until prev.nil? || prev.element?
+ def inline_filter(node)
+ if node.kind_of?(Array) || node.kind_of?(Hpricot::Elements)
+ node.each { |ch| inline_filter(ch) }
- case node.name
- when 'text'
- text = node.content.dup
+ elsif node.text?
+ prev = previous(node)
+ text = node.to_html.dup
text.sub!(/^\n+/m, '') if prev && prev.name == 'br'
- if node.previous_sibling.nil? && node.next_sibling
+ if node.previous.nil? && node.next
text.sub!(/\n+$/m, '')
else
text.sub!(/\n+$/m, ' ')
end
- write escape(text, decode_entities)
- when 'code'
- write '\fB'
- inline_filter(node.children, decode_entities=false)
- write '\fR'
- when 'b', 'strong', 'kbd', 'samp'
- write '\fB'
- inline_filter(node.children)
- write '\fR'
- when 'var', 'em', 'i', 'u'
- write '\fI'
- inline_filter(node.children)
- write '\fR'
- when 'br'
- macro 'br'
- when 'a'
- write '\fI'
- inline_filter(node.children)
- write '\fR'
+ write escape(text)
+
+ elsif node.elem?
+ case node.name
+ when 'code'
+ write '\fB'
+ inline_filter(node.children)
+ write '\fR'
+
+ when 'b', 'strong', 'kbd', 'samp'
+ write '\fB'
+ inline_filter(node.children)
+ write '\fR'
+
+ when 'var', 'em', 'i', 'u'
+ write '\fI'
+ inline_filter(node.children)
+ write '\fR'
+
+ when 'br'
+ macro 'br'
+ when 'a'
+ write '\fI'
+ inline_filter(node.children)
+ write '\fR'
+ else
+ warn "unrecognized inline tag: %p", node.name
+ end
+
else
- warn "unrecognized inline tag: %p", node.name
+ fail "unexpected node: #{node.inspect}"
end
end
def macro(name, value=nil)
writeln ".\n.#{[name, value].compact.join(' ')}"
end
- def escape(text, decode_entities=true)
- text = text.gsub(/[\\-]/) { |m| "\\#{m}" }
- if decode_entities
- text = text.
- gsub('&nbsp;', ' ').
- gsub('&lt;', '<').
- gsub('&gt;', '>').
- gsub('&amp;', '&')
- end
- text
+ def escape(text)
+ text.
+ gsub(/[\\-]/) { |m| "\\#{m}" }.
+ gsub('&nbsp;', ' ').
+ gsub('&lt;', '<').
+ gsub('&gt;', '>').
+ gsub('&amp;', '&')
end
def quote(text)
View
@@ -1,7 +1,7 @@
.\" generated with Ron/v0.3
.\" http://github.com/rtomayko/ron/
.
-.TH "RON" "1" "December 2009" "Ryan Tomayko" "Ron Manual"
+.TH "RON" "1" "March 2010" "Ryan Tomayko" "Ron Manual"
.
.SH "NAME"
\fBron\fR \-\- build markdown\-based man pages
@@ -209,9 +209,10 @@ something like 'less \-is'.
Used instead of \fBMANPAGER\fR when \fBMANPAGER\fR is not defined.
.
.SH "BUGS"
-Ron is written in Ruby and depends on nokogiri and rdiscount, native
-extension libraries that are non\-trivial to install on some systems. A
-more portable version of this program would be welcome.
+Ron is written in Ruby and depends on hpricot and rdiscount,
+extension libraries that are non\-trivial to install on some
+systems. A more portable version of this program would be
+welcome.
.
.SH "COPYRIGHT"
Ron is Copyright (C) 2009 Ryan Tomayko <tomayko.com/about>
Oops, something went wrong.

0 comments on commit f1b6c5e

Please sign in to comment.