Skip to content

Commit

Permalink
release-0.3.3
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitri Goutnik committed Jul 5, 2009
1 parent 2139b9f commit 9df340b
Show file tree
Hide file tree
Showing 10 changed files with 112 additions and 44 deletions.
34 changes: 25 additions & 9 deletions History.txt
@@ -1,18 +1,34 @@
== 0.2.1 / 2009-06-26

* Initial release
== 0.3.3 / 2009-07-05

== 0.3.0 / 2009-06-28
* New features

* Switched to Nokogiri for HTML parsing
* Better parsing for hierarchical TOCs
* Many bug fixes
* Option to add external files to the generated ePub (e.g. cover images, logos etc)
* Option to insert HTML fragments before/after specific element
* It is now possible to instruct repub to remove all links to CSS and <style> elements from source doc

== 0.3.1 / 2009-06-28
* Bug fixes

* Fixed App.data_path bug
* Metadata double namespace prefix
* Encoding autodetection now is done only once after download (as it was supposed to be)
* -e flag actually works
* Source doc content-type encoding now is always set to utf-8
* Fixed warnings in Profile helper under Ruby 1.9.1

== 0.3.2 / 2009-06-30

* Improved Win32 support
* Updated documentation

== 0.3.1 / 2009-06-28

* Fixed App.data_path bug

== 0.3.0 / 2009-06-28

* Switched to Nokogiri for HTML parsing
* Better parsing for hierarchical TOCs
* Many bug fixes

== 0.2.1 / 2009-06-26

* Initial release
6 changes: 5 additions & 1 deletion README.rdoc
Expand Up @@ -67,7 +67,7 @@ For example, if you later decide to regenerate Git Manual ePub without TOC at th

repub -l git-manual -X '//div[@class="toc"]' http://www.kernel.org/pub/software/scm/git/docs/user-manual.html

A few more examples:
Few more examples:

* GNU Wget Manual

Expand Down Expand Up @@ -142,6 +142,10 @@ Also, the following tools must be somewhere in $PATH:
Currently, only "everything-on-one-page" HTML sources are supported. Repub will download and process all page requisites
(stylesheets and images) but all actual content must be on one page.

Encoding auto-detection is slow.

Chardet 0.9.0 is broken under Ruby 1.9.

Bugs: probably. If you find any, please report them to dg at invisiblellama dot net.

== INSTALL:
Expand Down
1 change: 1 addition & 0 deletions Rakefile
@@ -1,4 +1,5 @@
begin
require 'rubygems'
require 'bones'
Bones.setup
rescue LoadError
Expand Down
2 changes: 1 addition & 1 deletion lib/repub.rb
@@ -1,7 +1,7 @@
module Repub

# :stopdoc:
VERSION = '0.3.2'
VERSION = '0.3.3'
LIBPATH = File.expand_path(File.dirname(__FILE__)) + File::SEPARATOR
PATH = File.dirname(LIBPATH) + File::SEPARATOR
# :startdoc:
Expand Down
10 changes: 5 additions & 5 deletions lib/repub/app/builder.rb
Expand Up @@ -162,7 +162,7 @@ def postprocess_doc(asset)
if @options[:css] == '-'
# Also remove all inline styles
doc.xpath('//head/style').remove
log.debug "-- Removing all stylesheet links and style elements"
log.info "Removing all stylesheet links and style elements"
else
# Add custom stylesheet link
link = Nokogiri::XML::Node.new('link', doc)
Expand All @@ -171,7 +171,7 @@ def postprocess_doc(asset)
link['href'] = File.basename(@options[:css])
# Add as the last child so it has precedence over (possible) inline styles before
doc.at('//head').add_child(link)
log.debug "-- Replacing CSS refs with #{link['href']}"
log.info "Replacing CSS refs with \"#{link['href']}\""
end
end

Expand All @@ -181,17 +181,17 @@ def postprocess_doc(asset)
fragment = e[selector]
element = doc.xpath(selector).first
if element
element.add_next_sibling(fragment)
log.info "Inserting fragment \"#{fragment.to_html}\" after \"#{selector}\""
fragment.children.to_a.reverse.each {|node| element.add_next_sibling(node) }
end
end if @options[:after]
@options[:before].each do |e|
selector = e.keys.first
fragment = e[selector]
element = doc.xpath(selector).first
if element
element.add_previous_sibling(fragment)
log.info "Inserting fragment \"#{fragment}\" before \"#{selector}\""
fragment.children.to_a.each {|node| element.add_previous_sibling(node) }
end
end if @options[:before]

Expand All @@ -203,7 +203,7 @@ def postprocess_doc(asset)

# Save processed doc
File.open(asset, 'w') do |f|
if @options[:fixup]
if @options[:fixup] || true
# HACK: Nokogiri seems to ignore the fact that xmlns and other attrs aleady present
# in html node and adds them anyway. Just remove them here to avoid duplicates.
doc.root.attributes.each {|name, value| doc.root.remove_attribute(name) }
Expand Down
2 changes: 1 addition & 1 deletion lib/repub/app/fetcher.rb
Expand Up @@ -31,7 +31,7 @@ class Fetcher

Downloaders = {
:wget => { :cmd => 'wget', :options => '-nv -E -H -k -p -nH -nd' },
:httrack => { :cmd => 'httrack', :options => '-gB -r2 +*.css +*.jpg -*.xml -*.html' }
:httrack => { :cmd => 'httrack', :options => '-gBqQ -r2 +*.css +*.jpg -*.xml -*.html' }
}

def initialize(options)
Expand Down
6 changes: 3 additions & 3 deletions repub.gemspec
Expand Up @@ -2,11 +2,11 @@

Gem::Specification.new do |s|
s.name = %q{repub}
s.version = "0.3.2"
s.version = "0.3.3"

s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
s.authors = ["Dmitri Goutnik"]
s.date = %q{2009-06-30}
s.date = %q{2009-07-05}
s.default_executable = %q{repub}
s.description = %q{Repub is a simple HTML to ePub converter.
Expand All @@ -16,7 +16,7 @@ ePub documents.}
s.email = %q{dg@invisiblellama.net}
s.executables = ["repub"]
s.extra_rdoc_files = ["History.txt", "README.rdoc", "bin/repub"]
s.files = ["History.txt", "README.rdoc", "Rakefile", "TODO", "bin/repub", "lib/repub.rb", "lib/repub/app.rb", "lib/repub/app/builder.rb", "lib/repub/app/fetcher.rb", "lib/repub/app/logger.rb", "lib/repub/app/options.rb", "lib/repub/app/parser.rb", "lib/repub/app/profile.rb", "lib/repub/app/utility.rb", "lib/repub/epub.rb", "lib/repub/epub/container.rb", "lib/repub/epub/content.rb", "lib/repub/epub/toc.rb", "repub.gemspec", "test/epub/test_container.rb", "test/epub/test_content.rb", "test/epub/test_toc.rb", "test/test_builder.rb", "test/test_fetcher.rb", "test/test_logger.rb", "test/test_parser.rb"]
s.files = ["History.txt", "README.rdoc", "Rakefile", "TODO", "bin/repub", "lib/repub.rb", "lib/repub/app.rb", "lib/repub/app/builder.rb", "lib/repub/app/fetcher.rb", "lib/repub/app/logger.rb", "lib/repub/app/options.rb", "lib/repub/app/parser.rb", "lib/repub/app/profile.rb", "lib/repub/app/utility.rb", "lib/repub/epub.rb", "lib/repub/epub/container.rb", "lib/repub/epub/content.rb", "lib/repub/epub/toc.rb", "repub.gemspec", "test/data/custom.css", "test/data/invisiblellama.png", "test/data/test.css", "test/data/test.html", "test/epub/test_container.rb", "test/epub/test_content.rb", "test/epub/test_toc.rb", "test/test_builder.rb", "test/test_fetcher.rb", "test/test_logger.rb", "test/test_parser.rb"]
s.homepage = %q{http://rubyforge.org/projects/repub/}
s.rdoc_options = ["--main", "README.rdoc"]
s.require_paths = ["lib"]
Expand Down
30 changes: 15 additions & 15 deletions test/data/test.html
@@ -1,8 +1,8 @@
<html>
<head>
<title>Test Page</title>
<link rel='stylesheet' type='text/css' href='test.css'>
<style>
<link rel='stylesheet' type='text/css' href='test.css'/>
<style type='text/css'>
h1 {
font-size: 4em;
}
Expand All @@ -19,42 +19,42 @@

<body>
<div class='img'>
<img src='invisiblellama.png'>
<img src='invisiblellama.png' alt='invisible llama'/>
</div>

<h1>Lorem Ipsum</h1>

<ul>
<li>
<a href='#1'>Chapter 1</a>
<a href='#c1'>Chapter 1</a>
<ul>
<li><a href='#11'>Chapter 1.1</a></li>
<li><a href='#12'>Chapter 1.2</a></li>
<li><a href='#c11'>Chapter 1.1</a></li>
<li><a href='#c12'>Chapter 1.2</a></li>
</ul>
</li>
<li>
<a href='#2'>Chapter 2</a>
<a href='#c2'>Chapter 2</a>
<ul>
<li><a href='#21'>Chapter 2.1</a></li>
<li><a href='#c21'>Chapter 2.1</a></li>
</ul>
</li>
<li>
<a href='#3'>Chapter 3</a>
<a href='#c3'>Chapter 3</a>
</li>
</ul>

<a id='1'><h1>Chapter 1</h1></a>
<h1><a id='c1'/>Chapter 1</h1>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
<a id='11'><h3>Chapter 1.1</h3></a>
<h3><a id='c11'/>Chapter 1.1</h3>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
<a id='12'><h3>Chapter 1.2</h3></a>
<h3><a id='c12'/>Chapter 1.2</h3>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
<a id='2'><h1>Chapter 2</h1></a>
<h1><a id='c2'/>Chapter 2</h1>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
<a id='21'><h3>Chapter 2.1</h3></a>
<h3><a id='c21'/>Chapter 2.1</h3>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
<a id='3'><h1>Chapter 3</h1></a>
<h1><a id='c3'/>Chapter 3</h1>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</body>
</html>
63 changes: 55 additions & 8 deletions test/test_builder.rb
Expand Up @@ -49,7 +49,6 @@ def test_rx
builder = build(parse(fetch))
doc_path = builder.document_path
doc_text = IO.read(doc_path)
#p doc_text
assert(doc_text =~ /Retpahc/ && doc_text !~ /Chapter/)
assert(doc_text =~ /<h2>/ && doc_text !~ /<h1>/)
assert(doc_text =~ /<\/h2>/ && doc_text !~ /<\/h1>/)
Expand All @@ -61,7 +60,6 @@ def test_custom_css
builder = build(parse(fetch))
doc_path = builder.document_path
doc_text = IO.read(doc_path)
#p doc_text
doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
links = doc.xpath('//head/link[@rel="stylesheet"]')
# we have single link
Expand All @@ -78,7 +76,6 @@ def test_removing_styles
builder = build(parse(fetch))
doc_path = builder.document_path
doc_text = IO.read(doc_path)
p doc_text
doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
links = doc.xpath('//head/link[@rel="stylesheet"]')
# no stylesheet links
Expand All @@ -88,18 +85,68 @@ def test_removing_styles
assert_equal(0, styles.size)
end

def next_nontext_sibling(el)
begin
el = el.next_sibling
end while el.text?
el
end

def previous_nontext_sibling(el)
begin
el = el.previous_sibling
end while el.text?
el
end

def test_inserting_elements_after
selector1 = '//ul'
fragment1 = Nokogiri::HTML.fragment('<p>blah</p>')
selector2 = '//p[last()]'
fragment2 = Nokogiri::HTML.fragment('<span>bleh</span>')
@options[:after] = [{ selector1 => fragment1}, {selector2 => fragment2}]
fragment2 = Nokogiri::HTML.fragment('<span>bleh</span><div>boo</div>')
@options[:after] = [{ selector1 => fragment1.clone}, {selector2 => fragment2.clone}]
builder = build(parse(fetch))
doc_path = builder.document_path
doc_text = IO.read(doc_path)
#p doc_text
doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
p doc
assert_equal(fragment1.to_html, doc.xpath(selector1).first.next_sibling.to_html)
el = next_nontext_sibling(doc.at(selector1))
assert_equal(fragment1.children[0].to_s.strip, el.to_s.strip)
# first fragment node
el = next_nontext_sibling(doc.at(selector2))
assert_equal(fragment2.children[0].to_s.strip, el.to_s.strip)
# second fragment node
el = next_nontext_sibling(el)
assert_equal(fragment2.children[1].to_s.strip, el.to_s.strip)
end

def test_inserting_elements_before
selector1 = '//a[@id="c11"]'
fragment1 = Nokogiri::HTML.fragment('<h4>blah</h4><div>boo</div>')
selector2 = '//p[position()=5]'
fragment2 = Nokogiri::HTML.fragment('<div>test</div>')
@options[:before] = [{ selector1 => fragment1.clone}, {selector2 => fragment2.clone}]
builder = build(parse(fetch))
doc_path = builder.document_path
doc_text = IO.read(doc_path)
doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
# first fragment node
el = previous_nontext_sibling(doc.at(selector1))
assert_equal(fragment1.children[1].to_s.strip, el.to_s.strip)
# second fragment node
el = previous_nontext_sibling(el)
assert_equal(fragment1.children[0].to_s.strip, el.to_s.strip)
el = previous_nontext_sibling(doc.at(selector2))
assert_equal(fragment2.children[0].to_s.strip, el.to_s.strip)
end

def test_remove_elements
@options[:remove] = ['ul', '//a[@id="c2"]', 'div[@class="img"]']
builder = build(parse(fetch))
doc_path = builder.document_path
doc_text = IO.read(doc_path)
doc = Nokogiri::HTML.parse(doc_text, nil, 'UTF-8')
@options[:remove].each do |selector|
assert_equal(0, doc.xpath(selector).size)
end
end
end
2 changes: 1 addition & 1 deletion test/test_parser.rb
Expand Up @@ -39,7 +39,7 @@ def test_parser
assert_equal(2, parser.toc[0].subitems.size)
assert_equal('Chapter 1.2', parser.toc[0].subitems[1].title)
assert_equal(cache.assets[:documents][0], parser.toc[0].subitems[1].uri)
assert_equal('12', parser.toc[0].subitems[1].fragment_id)
assert_equal('c12', parser.toc[0].subitems[1].fragment_id)
end

end

0 comments on commit 9df340b

Please sign in to comment.