Skip to content

Commit

Permalink
More robust meta tag detection.
Browse files Browse the repository at this point in the history
Meta tag detection used rexml. While this gives all the power of parsing
and working with XML, it is very strict on the input it accepts. On
bad markup it simply throw the towel and gave up on parsing.

As the detection of meta tags should not depend on bad (html) markup
much later in the file, the parsing is now ported to using Regular
Expression.
  • Loading branch information
huerlisi committed Jan 10, 2011
1 parent 5cf15d6 commit c7149a2
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 14 deletions.
23 changes: 9 additions & 14 deletions lib/pdfkit/pdfkit.rb
Expand Up @@ -79,23 +79,18 @@ def to_file(path)

protected

def find_options_in_meta(body)
pdfkit_meta_tags(body).inject({}) do |found, tag|
name = tag.attributes["name"].sub(/^#{PDFKit.configuration.meta_tag_prefix}/, '').to_sym
found.merge(name => tag.attributes["content"])
def find_options_in_meta(content)
# Read file if content is a File
content = content.read if content.is_a?(File)

found = {}
content.scan(/<meta [^>]*>/) do |meta|
puts PDFKit.configuration.meta_tag_prefix
name = meta.scan(/name=["']#{PDFKit.configuration.meta_tag_prefix}([^"']*)/)[0][0]
found[name] = meta.scan(/content=["']([^"']*)/)[0][0]
end
end

def pdfkit_meta_tags(body)
require 'rexml/document'
xml_body = REXML::Document.new(body)
found = []
xml_body.elements.each("html/head/meta") do |tag|
found << tag if tag.attributes['name'].to_s =~ /^#{PDFKit.configuration.meta_tag_prefix}/
end
found
rescue # rexml random crash on invalid xml
[]
end

def style_tag_for(stylesheet)
Expand Down
16 changes: 16 additions & 0 deletions spec/pdfkit_spec.rb
Expand Up @@ -105,6 +105,22 @@
pdfkit.command[pdfkit.command.index('"--page-size"') + 1].should == '"Legal"'
pdfkit.command[pdfkit.command.index('"--orientation"') + 1].should == '"Landscape"'
end

it "should detect special pdfkit meta tags despite bad markup" do
body = %{
<html>
<head>
<meta name="pdfkit-page_size" content="Legal"/>
<meta name="pdfkit-orientation" content="Landscape"/>
</head>
<br>
</html>
}
pdfkit = PDFKit.new(body)
pdfkit.command[pdfkit.command.index('"--page-size"') + 1].should == '"Legal"'
pdfkit.command[pdfkit.command.index('"--orientation"') + 1].should == '"Landscape"'
end

end

context "#to_pdf" do
Expand Down

0 comments on commit c7149a2

Please sign in to comment.