Skip to content

Commit

Permalink
Call IO.popen with an Array of command arguments (#518). (#519)
Browse files Browse the repository at this point in the history
* By calling `IO.popen` with an Array of command arguments
  (ex: `['ls', '-l', ...]`) it runs the command as a separate process
  instead of running it in a sub-shell as a shell command. This prevents
  any arbitrary command injection or env variable interpolation, without
  needing complex shell-escaping logic.
  https://ruby-doc.org/core-3.1.2/IO.html#method-c-popen
* Changed `Configuration#executable` to return a String or an Array for
  when xvfb mode is enabled.
* Changed `PDFKit#command` to return an Array of command arguments for
  `IO.popen`.
* Removed argument quoting logic as it's not necessary when calling
  `IO.popen` with an Array of arguments.
* Rewrote some specs to test if the command's Array of arguments contains
  specific argument values.
* Added a custom RSpec `contain` matcher for testing if an expected Array
  exists within another Array.
  • Loading branch information
postmodern committed Oct 18, 2022
1 parent ceca488 commit 79ec0c0
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 59 deletions.
2 changes: 1 addition & 1 deletion lib/pdfkit/configuration.rb
Expand Up @@ -45,7 +45,7 @@ def wkhtmltopdf=(path)
end

def executable
using_xvfb? ? "xvfb-run #{wkhtmltopdf}" : wkhtmltopdf
using_xvfb? ? ['xvfb-run', wkhtmltopdf] : wkhtmltopdf
end

def using_xvfb?
Expand Down
15 changes: 5 additions & 10 deletions lib/pdfkit/pdfkit.rb
Expand Up @@ -46,16 +46,11 @@ def initialize(url_file_or_html, options = {})
end

def command(path = nil)
args = @renderer.options_for_command
shell_escaped_command = [executable, OS::shell_escape_for_os(args)].join ' '

# In order to allow for URL parameters (e.g. https://www.google.com/search?q=pdfkit) we do
# not escape the source. The user is responsible for ensuring that no vulnerabilities exist
# in the source. Please see https://github.com/pdfkit/pdfkit/issues/164.
input_for_command = @source.to_input_for_command
output_for_command = path ? Shellwords.shellescape(path) : '-'

"#{shell_escaped_command} #{input_for_command} #{output_for_command}"
args = [*executable]
args.concat(@renderer.options_for_command)
args << @source.to_input_for_command
args << (path ? path : '-')
args
end

def options
Expand Down
4 changes: 2 additions & 2 deletions lib/pdfkit/source.rb
Expand Up @@ -29,7 +29,7 @@ def to_input_for_command
if file?
@source.path
elsif url?
%{"#{shell_safe_url}"}
escaped_url
else
SOURCE_FROM_STDIN
end
Expand All @@ -41,7 +41,7 @@ def to_s

private

def shell_safe_url
def escaped_url
url_needs_escaping? ? URI::DEFAULT_PARSER.escape(@source) : @source
end

Expand Down
2 changes: 1 addition & 1 deletion lib/pdfkit/wkhtmltopdf.rb
Expand Up @@ -64,7 +64,7 @@ def normalize_value(value)
when Array
value.flatten.collect{|x| x.to_s}
else
(OS::host_is_windows? && value.to_s.index(' ')) ? "'#{ value.to_s }'" : value.to_s
value.to_s
end
end

Expand Down
74 changes: 39 additions & 35 deletions spec/pdfkit_spec.rb
Expand Up @@ -175,22 +175,22 @@
it "constructs the correct command" do
pdfkit = PDFKit.new('html', :page_size => 'Letter', :toc_l1_font_size => 12, :replace => {'foo' => 'bar'})
command = pdfkit.command
expect(command).to include "wkhtmltopdf"
expect(command).to include "--page-size Letter"
expect(command).to include "--toc-l1-font-size 12"
expect(command).to include "--replace foo bar"
expect(command.first).to match(/wkhtmltopdf/)
expect(command).to contain %w[--page-size Letter]
expect(command).to contain %w[--toc-l1-font-size 12]
expect(command).to contain %w[--replace foo bar]
end

it "sets up one cookie when hash has only one cookie" do
pdfkit = PDFKit.new('html', cookie: {cookie_name: :cookie_value})
command = pdfkit.command
expect(command).to include "--cookie cookie_name cookie_value"
expect(command).to contain %w[--cookie cookie_name cookie_value]
end

it "does not break Windows paths" do
it "does not split Windows paths that contain spaces" do
pdfkit = PDFKit.new('html')
allow(PDFKit.configuration).to receive(:wkhtmltopdf).and_return 'c:/Program Files/wkhtmltopdf/wkhtmltopdf.exe'
expect(pdfkit.command).not_to include('Program\ Files')
expect(pdfkit.command).not_to contain(%w[c:/Program Files/wkhtmltopdf/wkhtmltopdf.exe])
end

it "does not shell escape source URLs" do
Expand All @@ -207,15 +207,15 @@
it "sets up multiple cookies when passed multiple cookies" do
pdfkit = PDFKit.new('html', :cookie => {:cookie_name1 => :cookie_val1, :cookie_name2 => :cookie_val2})
command = pdfkit.command
expect(command).to include "--cookie cookie_name1 cookie_val1"
expect(command).to include "--cookie cookie_name2 cookie_val2"
expect(command).to contain %w[--cookie cookie_name1 cookie_val1]
expect(command).to contain %w[--cookie cookie_name2 cookie_val2]
end

it "sets up multiple cookies when passed an array of tuples" do
pdfkit = PDFKit.new('html', :cookie => [[:cookie_name1, :cookie_val1], [:cookie_name2, :cookie_val2]])
command = pdfkit.command
expect(command).to include "--cookie cookie_name1 cookie_val1"
expect(command).to include "--cookie cookie_name2 cookie_val2"
expect(command).to contain %w[--cookie cookie_name1 cookie_val1]
expect(command).to contain %w[--cookie cookie_name2 cookie_val2]
end

it "will not include default options it is told to omit" do
Expand All @@ -229,48 +229,57 @@
expect(pdfkit.command).not_to include('--disable-smart-shrinking')
end

it "encapsulates string arguments in quotes" do
it "must not split string arguments containing spaces" do
pdfkit = PDFKit.new('html', :header_center => "foo [page]")
expect(pdfkit.command).to include "--header-center foo\\ \\[page\\]"
expect(pdfkit.command).to contain ['--header-center', 'foo [page]']
end

it "sanitizes string arguments" do
it "paramatarizes string arguments" do
pdfkit = PDFKit.new('html', :header_center => "$(ls)")
expect(pdfkit.command).to include "--header-center \\$\\(ls\\)"
expect(pdfkit.command).to contain %w[--header-center $(ls)]
end

it "read the source from stdin if it is html" do
pdfkit = PDFKit.new('html')
expect(pdfkit.command).to match(/- -$/)
command = pdfkit.command
expect(command[-2]).to eq('-')
expect(command[-1]).to eq('-')
end

it "specifies the URL to the source if it is a url" do
pdfkit = PDFKit.new('http://google.com')
expect(pdfkit.command).to match(/"http:\/\/google.com" -$/)
command = pdfkit.command
expect(command[-2]).to eq("http://google.com")
expect(command[-1]).to eq("-")
end

it "does not break Windows paths" do
pdfkit = PDFKit.new('html')
allow(PDFKit.configuration).to receive(:wkhtmltopdf).and_return 'c:/Program Files/wkhtmltopdf/wkhtmltopdf.exe'
expect(pdfkit.command).not_to include('Program\ Files')
expect(pdfkit.command).not_to contain ['Program', 'Files']
end

it "specifies the path to the source if it is a file" do
file_path = File.join(SPEC_ROOT,'fixtures','example.html')
pdfkit = PDFKit.new(File.new(file_path))
expect(pdfkit.command).to match(/#{file_path} -$/)
command = pdfkit.command
expect(command[-2]).to eq(file_path)
expect(command[-1]).to eq('-')
end

it "specifies the path to the source if it is a tempfile" do
file_path = File.join(SPEC_ROOT,'fixtures','example.html')
pdfkit = PDFKit.new(Tempfile.new(file_path))
expect(pdfkit.command).to match(/#{Dir.tmpdir}\S+ -$/)
command = pdfkit.command
expect(command[-2]).to start_with(Dir.tmpdir)
expect(command[-1]).to eq('-')
end

it "specifies the path for the ouput if a path is given" do
file_path = "/path/to/output.pdf"
pdfkit = PDFKit.new("html")
expect(pdfkit.command(file_path)).to match(/#{file_path}$/)
command = pdfkit.command(file_path)
expect(command.last).to eq(file_path)
end

it "detects special pdfkit meta tags" do
Expand All @@ -284,8 +293,8 @@
}
pdfkit = PDFKit.new(body)
command = pdfkit.command
expect(command).to include "--page-size Legal"
expect(command).to include "--orientation Landscape"
expect(command).to contain %w[--page-size Legal]
expect(command).to contain %w[--orientation Landscape]
end

it "detects cookies meta tag" do
Expand All @@ -299,7 +308,7 @@
}
pdfkit = PDFKit.new(body)
command = pdfkit.command
expect(command).to include "--cookie rails_session rails_session_value --cookie cookie_variable cookie_variable_value"
expect(command).to contain %w[--cookie rails_session rails_session_value --cookie cookie_variable cookie_variable_value]
end

it "detects disable_smart_shrinking meta tag" do
Expand All @@ -313,7 +322,7 @@
pdfkit = PDFKit.new(body)
command = pdfkit.command
expect(command).to include "--disable-smart-shrinking"
expect(command).not_to include "--disable-smart-shrinking true"
expect(command).not_to contain %w[--disable-smart-shrinking true]
end

it "detects names with hyphens instead of underscores" do
Expand Down Expand Up @@ -342,8 +351,8 @@
}
pdfkit = PDFKit.new(body)
command = pdfkit.command
expect(command).to include "--page-size Legal"
expect(command).to include "--orientation Landscape"
expect(command).to contain %w[--page-size Legal]
expect(command).to contain %w[--orientation Landscape]
end

it "skips non-pdfkit meta tags" do
Expand All @@ -358,8 +367,8 @@
}
pdfkit = PDFKit.new(body)
command = pdfkit.command
expect(command).not_to include "--page-size Legal"
expect(command).to include "--orientation Landscape"
expect(command).not_to contain %w[--page-size Legal]
expect(command).to contain %w[--orientation Landscape]
end

it "does not use quiet when told to" do
Expand Down Expand Up @@ -422,14 +431,9 @@
allow(PDFKit::OS).to receive(:host_is_windows?).and_return(true)
end

it "escapes special windows characters" do
pdf = PDFKit.new('html', :title => 'hello(world)')
expect(pdf.command).to include 'hello^(world^)'
end

it "quotes spaces in options" do
pdf = PDFKit.new('html', :title => 'hello world')
expect(pdf.command).to include "--title 'hello world'"
expect(pdf.command).to contain ['--title', "hello world"]
end
end
end
Expand Down
15 changes: 5 additions & 10 deletions spec/source_spec.rb
Expand Up @@ -75,19 +75,14 @@
end

describe "#to_input_for_command" do
it "URI escapes source URLs and encloses them in quotes to accomodate ampersands" do
source = PDFKit::Source.new("https://www.google.com/search?q='cat<dev/zero>/dev/null'")
expect(source.to_input_for_command).to eq "\"https://www.google.com/search?q='cat%3Cdev/zero%3E/dev/null'\""
end

it "URI escapes source URI only escape part of it" do
source = PDFKit::Source.new("https://www.google.com/search?q='%20 sleep 5'")
expect(source.to_input_for_command).to eq "\"https://www.google.com/search?q='%2520%20sleep%205'\""
it "URI escapes source URI" do
source = PDFKit::Source.new("https://www.google.com/search?q=foo bar")
expect(source.to_input_for_command).to eq "https://www.google.com/search?q=foo%20bar"
end

it "does not URI escape previously escaped source URLs" do
source = PDFKit::Source.new("https://www.google.com/search?q='cat%3Cdev/zero%3E/dev/null'")
expect(source.to_input_for_command).to eq "\"https://www.google.com/search?q='cat%3Cdev/zero%3E/dev/null'\""
source = PDFKit::Source.new("https://www.google.com/search?q=foo%20bar")
expect(source.to_input_for_command).to eq "https://www.google.com/search?q=foo%20bar"
end

it "returns a '-' for HTML strings to indicate that we send that content through STDIN" do
Expand Down
10 changes: 10 additions & 0 deletions spec/spec_helper.rb
Expand Up @@ -21,3 +21,13 @@
RSpec.configure do |config|
include Rack::Test::Methods
end

RSpec::Matchers.define :contain do |expected|
match do |actual|
(0..(actual.length - expected.length)).any? do |base_index|
expected.each_with_index.all? do |expected_element,index|
actual[base_index+index] == expected_element
end
end
end
end

0 comments on commit 79ec0c0

Please sign in to comment.