release-0.3.3

jugyo · Jul 5, 2009 · 9df340b · 9df340b
1 parent 2139b9f
commit 9df340b
Show file tree

Hide file tree

Showing 10 changed files with 112 additions and 44 deletions.
diff --git a/History.txt b/History.txt
@@ -1,18 +1,34 @@
-== 0.2.1 / 2009-06-26
-
-* Initial release
+== 0.3.3 / 2009-07-05
 
-== 0.3.0 / 2009-06-28
+* New features
 
-* Switched to Nokogiri for HTML parsing
-* Better parsing for hierarchical TOCs
-* Many bug fixes
+    * Option to add external files to the generated ePub (e.g. cover images, logos etc)
+    * Option to insert HTML fragments before/after specific element
+    * It is now possible to instruct repub to remove all links to CSS and <style> elements from source doc
 
-== 0.3.1 / 2009-06-28
+* Bug fixes
 
-* Fixed App.data_path bug
+    * Metadata double namespace prefix
+    * Encoding autodetection now is done only once after download (as it was supposed to be)
+    * -e flag actually works
+    * Source doc content-type encoding now is always set to utf-8
+    * Fixed warnings in Profile helper under Ruby 1.9.1
 
 == 0.3.2 / 2009-06-30
 
 * Improved Win32 support
 * Updated documentation
+
+== 0.3.1 / 2009-06-28
+
+* Fixed App.data_path bug
+
+== 0.3.0 / 2009-06-28
+
+* Switched to Nokogiri for HTML parsing
+* Better parsing for hierarchical TOCs
+* Many bug fixes
+
+== 0.2.1 / 2009-06-26
+
+* Initial release
diff --git a/README.rdoc b/README.rdoc
@@ -67,7 +67,7 @@ For example, if you later decide to regenerate Git Manual ePub without TOC at th
 
     repub -l git-manual -X '//div[@class="toc"]' http://www.kernel.org/pub/software/scm/git/docs/user-manual.html
 
-A few more examples:
+Few more examples:
 
 * GNU Wget Manual
 
@@ -142,6 +142,10 @@ Also, the following tools must be somewhere in $PATH:
 Currently, only "everything-on-one-page" HTML sources are supported. Repub will download and process all page requisites
 (stylesheets and images) but all actual content must be on one page.
 
+Encoding auto-detection is slow.
+
+Chardet 0.9.0 is broken under Ruby 1.9.
+
 Bugs: probably. If you find any, please report them to dg at invisiblellama dot net.
 
 == INSTALL:

diff --git a/Rakefile b/Rakefile
@@ -1,4 +1,5 @@
 begin
+  require 'rubygems'
   require 'bones'
   Bones.setup
 rescue LoadError

diff --git a/lib/repub.rb b/lib/repub.rb
@@ -1,7 +1,7 @@
 module Repub
 
   # :stopdoc:
-  VERSION = '0.3.2'
+  VERSION = '0.3.3'
   LIBPATH = File.expand_path(File.dirname(__FILE__)) + File::SEPARATOR
   PATH = File.dirname(LIBPATH) + File::SEPARATOR
   # :startdoc:

diff --git a/lib/repub/app/builder.rb b/lib/repub/app/builder.rb
@@ -162,7 +162,7 @@ def postprocess_doc(asset)
             if @options[:css] == '-'
               # Also remove all inline styles
               doc.xpath('//head/style').remove
-              log.debug "-- Removing all stylesheet links and style elements"
+              log.info "Removing all stylesheet links and style elements"
             else
               # Add custom stylesheet link
               link = Nokogiri::XML::Node.new('link', doc)
@@ -171,7 +171,7 @@ def postprocess_doc(asset)
               link['href'] = File.basename(@options[:css])
               # Add as the last child so it has precedence over (possible) inline styles before
               doc.at('//head').add_child(link)
-              log.debug "-- Replacing CSS refs with #{link['href']}"
+              log.info "Replacing CSS refs with \"#{link['href']}\""
             end
           end
 
@@ -181,17 +181,17 @@ def postprocess_doc(asset)
             fragment = e[selector]
             element = doc.xpath(selector).first
             if element
-              element.add_next_sibling(fragment)
               log.info "Inserting fragment \"#{fragment.to_html}\" after \"#{selector}\""
+              fragment.children.to_a.reverse.each {|node| element.add_next_sibling(node) }
             end
           end if @options[:after]
           @options[:before].each do |e|
             selector = e.keys.first
             fragment = e[selector]
             element = doc.xpath(selector).first
             if element
-              element.add_previous_sibling(fragment)
               log.info "Inserting fragment \"#{fragment}\" before \"#{selector}\""
+              fragment.children.to_a.each {|node| element.add_previous_sibling(node) }
             end
           end if @options[:before]
 
@@ -203,7 +203,7 @@ def postprocess_doc(asset)
 
           # Save processed doc
           File.open(asset, 'w') do |f|
-            if @options[:fixup]
+            if @options[:fixup] || true
               # HACK: Nokogiri seems to ignore the fact that xmlns and other attrs aleady present
               # in html node and adds them anyway. Just remove them here to avoid duplicates.
               doc.root.attributes.each {|name, value| doc.root.remove_attribute(name) }

diff --git a/lib/repub/app/fetcher.rb b/lib/repub/app/fetcher.rb
@@ -31,7 +31,7 @@ class Fetcher
 
         Downloaders = {
           :wget     => { :cmd => 'wget', :options => '-nv -E -H -k -p -nH -nd' },
-          :httrack  => { :cmd => 'httrack', :options => '-gB -r2 +*.css +*.jpg -*.xml -*.html' }
+          :httrack  => { :cmd => 'httrack', :options => '-gBqQ -r2 +*.css +*.jpg -*.xml -*.html' }
         }
 
         def initialize(options)

diff --git a/repub.gemspec b/repub.gemspec
@@ -2,11 +2,11 @@
 
 Gem::Specification.new do |s|
   s.name = %q{repub}
-  s.version = "0.3.2"
+  s.version = "0.3.3"
 
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Dmitri Goutnik"]
-  s.date = %q{2009-06-30}
+  s.date = %q{2009-07-05}
   s.default_executable = %q{repub}
   s.description = %q{Repub is a simple HTML to ePub converter.
 
@@ -16,7 +16,7 @@ ePub documents.}
   s.email = %q{dg@invisiblellama.net}
   s.executables = ["repub"]
   s.extra_rdoc_files = ["History.txt", "README.rdoc", "bin/repub"]
-  s.files = ["History.txt", "README.rdoc", "Rakefile", "TODO", "bin/repub", "lib/repub.rb", "lib/repub/app.rb", "lib/repub/app/builder.rb", "lib/repub/app/fetcher.rb", "lib/repub/app/logger.rb", "lib/repub/app/options.rb", "lib/repub/app/parser.rb", "lib/repub/app/profile.rb", "lib/repub/app/utility.rb", "lib/repub/epub.rb", "lib/repub/epub/container.rb", "lib/repub/epub/content.rb", "lib/repub/epub/toc.rb", "repub.gemspec", "test/epub/test_container.rb", "test/epub/test_content.rb", "test/epub/test_toc.rb", "test/test_builder.rb", "test/test_fetcher.rb", "test/test_logger.rb", "test/test_parser.rb"]
+  s.files = ["History.txt", "README.rdoc", "Rakefile", "TODO", "bin/repub", "lib/repub.rb", "lib/repub/app.rb", "lib/repub/app/builder.rb", "lib/repub/app/fetcher.rb", "lib/repub/app/logger.rb", "lib/repub/app/options.rb", "lib/repub/app/parser.rb", "lib/repub/app/profile.rb", "lib/repub/app/utility.rb", "lib/repub/epub.rb", "lib/repub/epub/container.rb", "lib/repub/epub/content.rb", "lib/repub/epub/toc.rb", "repub.gemspec", "test/data/custom.css", "test/data/invisiblellama.png", "test/data/test.css", "test/data/test.html", "test/epub/test_container.rb", "test/epub/test_content.rb", "test/epub/test_toc.rb", "test/test_builder.rb", "test/test_fetcher.rb", "test/test_logger.rb", "test/test_parser.rb"]
   s.homepage = %q{http://rubyforge.org/projects/repub/}
   s.rdoc_options = ["--main", "README.rdoc"]
   s.require_paths = ["lib"]

diff --git a/test/data/test.html b/test/data/test.html
@@ -1,8 +1,8 @@
 <html>
 <head>
 <title>Test Page</title>
-<link rel='stylesheet' type='text/css' href='test.css'>
-<style>
+<link rel='stylesheet' type='text/css' href='test.css'/>
+<style type='text/css'>
 h1 {
   font-size: 4em;
 }
@@ -19,42 +19,42 @@
 
 <body>
   <div class='img'>
-    <img src='invisiblellama.png'>
+    <img src='invisiblellama.png' alt='invisible llama'/>
   </div>
 
   <h1>Lorem Ipsum</h1>
 
   <ul>
     <li>
-      <a href='#1'>Chapter 1</a>
+      <a href='#c1'>Chapter 1</a>
       <ul>
-        <li><a href='#11'>Chapter 1.1</a></li>
-        <li><a href='#12'>Chapter 1.2</a></li>
+        <li><a href='#c11'>Chapter 1.1</a></li>
+        <li><a href='#c12'>Chapter 1.2</a></li>
       </ul>
     </li>
     <li>
-      <a href='#2'>Chapter 2</a>
+      <a href='#c2'>Chapter 2</a>
       <ul>
-        <li><a href='#21'>Chapter 2.1</a></li>
+        <li><a href='#c21'>Chapter 2.1</a></li>
       </ul>
     </li>
     <li>
-      <a href='#3'>Chapter 3</a>
+      <a href='#c3'>Chapter 3</a>
     </li>
   </ul>
 
-  <a id='1'><h1>Chapter 1</h1></a>
+  <h1><a id='c1'/>Chapter 1</h1>
   <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
   <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
-  <a id='11'><h3>Chapter 1.1</h3></a>
+  <h3><a id='c11'/>Chapter 1.1</h3>
   <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
-  <a id='12'><h3>Chapter 1.2</h3></a>
+  <h3><a id='c12'/>Chapter 1.2</h3>
   <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
-  <a id='2'><h1>Chapter 2</h1></a>
+  <h1><a id='c2'/>Chapter 2</h1>
   <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
-  <a id='21'><h3>Chapter 2.1</h3></a>
+  <h3><a id='c21'/>Chapter 2.1</h3>
   <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
-  <a id='3'><h1>Chapter 3</h1></a>
+  <h1><a id='c3'/>Chapter 3</h1>
   <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
 </body>
 </html>
diff --git a/test/test_builder.rb b/test/test_builder.rb
@@ -49,7 +49,6 @@ def test_rx
     builder = build(parse(fetch))
     doc_path = builder.document_path
     doc_text = IO.read(doc_path)
-    #p doc_text
     assert(doc_text =~ /Retpahc/ && doc_text !~ /Chapter/)
     assert(doc_text =~ /<h2>/ && doc_text !~ /<h1>/)
     assert(doc_text =~ /<\/h2>/ && doc_text !~ /<\/h1>/)
@@ -61,7 +60,6 @@ def test_custom_css
     builder = build(parse(fetch))
     doc_path = builder.document_path
     doc_text = IO.read(doc_path)
-    #p doc_text
     doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
     links = doc.xpath('//head/link[@rel="stylesheet"]')
     # we have single link
@@ -78,7 +76,6 @@ def test_removing_styles
     builder = build(parse(fetch))
     doc_path = builder.document_path
     doc_text = IO.read(doc_path)
-    p doc_text
     doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
     links = doc.xpath('//head/link[@rel="stylesheet"]')
     # no stylesheet links
@@ -88,18 +85,68 @@ def test_removing_styles
     assert_equal(0, styles.size)
   end
 
+  def next_nontext_sibling(el)
+    begin
+      el = el.next_sibling
+    end while el.text?
+    el
+  end
+
+  def previous_nontext_sibling(el)
+    begin
+      el = el.previous_sibling
+    end while el.text?
+    el
+  end
+
   def test_inserting_elements_after
     selector1 = '//ul'
     fragment1 = Nokogiri::HTML.fragment('<p>blah</p>')
     selector2 = '//p[last()]'
-    fragment2 = Nokogiri::HTML.fragment('<span>bleh</span>')
-    @options[:after] = [{ selector1 => fragment1}, {selector2 => fragment2}]
+    fragment2 = Nokogiri::HTML.fragment('<span>bleh</span><div>boo</div>')
+    @options[:after] = [{ selector1 => fragment1.clone}, {selector2 => fragment2.clone}]
     builder = build(parse(fetch))
     doc_path = builder.document_path
     doc_text = IO.read(doc_path)
-    #p doc_text
     doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
-    p doc
-    assert_equal(fragment1.to_html, doc.xpath(selector1).first.next_sibling.to_html)
+    el = next_nontext_sibling(doc.at(selector1))
+    assert_equal(fragment1.children[0].to_s.strip, el.to_s.strip)
+    # first fragment node
+    el = next_nontext_sibling(doc.at(selector2))
+    assert_equal(fragment2.children[0].to_s.strip, el.to_s.strip)
+    # second fragment node
+    el = next_nontext_sibling(el)
+    assert_equal(fragment2.children[1].to_s.strip, el.to_s.strip)
+  end
+
+  def test_inserting_elements_before
+    selector1 = '//a[@id="c11"]'
+    fragment1 = Nokogiri::HTML.fragment('<h4>blah</h4><div>boo</div>')
+    selector2 = '//p[position()=5]'
+    fragment2 = Nokogiri::HTML.fragment('<div>test</div>')
+    @options[:before] = [{ selector1 => fragment1.clone}, {selector2 => fragment2.clone}]
+    builder = build(parse(fetch))
+    doc_path = builder.document_path
+    doc_text = IO.read(doc_path)
+    doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
+    # first fragment node
+    el = previous_nontext_sibling(doc.at(selector1))
+    assert_equal(fragment1.children[1].to_s.strip, el.to_s.strip)
+    # second fragment node
+    el = previous_nontext_sibling(el)
+    assert_equal(fragment1.children[0].to_s.strip, el.to_s.strip)
+    el = previous_nontext_sibling(doc.at(selector2))
+    assert_equal(fragment2.children[0].to_s.strip, el.to_s.strip)
+  end
+
+  def test_remove_elements
+    @options[:remove] = ['ul', '//a[@id="c2"]', 'div[@class="img"]']
+    builder = build(parse(fetch))
+    doc_path = builder.document_path
+    doc_text = IO.read(doc_path)
+    doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
+    @options[:remove].each do |selector|
+      assert_equal(0, doc.xpath(selector).size)
+    end
   end
 end
diff --git a/test/test_parser.rb b/test/test_parser.rb
@@ -39,7 +39,7 @@ def test_parser
     assert_equal(2, parser.toc[0].subitems.size)
     assert_equal('Chapter 1.2', parser.toc[0].subitems[1].title)
     assert_equal(cache.assets[:documents][0], parser.toc[0].subitems[1].uri)
-    assert_equal('12', parser.toc[0].subitems[1].fragment_id)
+    assert_equal('c12', parser.toc[0].subitems[1].fragment_id)
   end
 
 end