Skip to content

Commit

Permalink
Merge pull request #392 from welguisz/option-off-html-strings
Browse files Browse the repository at this point in the history
Disable html injection
  • Loading branch information
Empact committed Dec 1, 2017
2 parents 85c2736 + ae2cb13 commit 20d7810
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 33 deletions.
6 changes: 4 additions & 2 deletions lib/roo/excelx.rb
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ def initialize(filename_or_stream, options = {})
sheet_options = {}
sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false)

shared_options = {}

shared_options[:disable_html_wrapper] = (options[:disable_html_wrapper] || false)
unless is_stream?(filename_or_stream)
file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed)
basename = find_basename(filename_or_stream)
Expand All @@ -52,7 +54,7 @@ def initialize(filename_or_stream, options = {})
@tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root])
ObjectSpace.define_finalizer(self, self.class.finalize(object_id))

@shared = Shared.new(@tmpdir)
@shared = Shared.new(@tmpdir, shared_options)
@filename = local_filename(filename_or_stream, @tmpdir, packed)
process_zipfile(@filename || filename_or_stream)

Expand Down
3 changes: 2 additions & 1 deletion lib/roo/excelx/extractor.rb
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
module Roo
class Excelx
class Extractor
def initialize(path)
def initialize(path, options = {})
@path = path
@options = options
end

private
Expand Down
5 changes: 3 additions & 2 deletions lib/roo/excelx/shared.rb
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,20 @@ class Excelx
# to various inititializers.
class Shared
attr_accessor :comments_files, :sheet_files, :rels_files
def initialize(dir)
def initialize(dir, options = {})
@dir = dir
@comments_files = []
@sheet_files = []
@rels_files = []
@options = options
end

def styles
@styles ||= Styles.new(File.join(@dir, 'roo_styles.xml'))
end

def shared_strings
@shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'))
@shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'), @options)
end

def workbook
Expand Down
1 change: 1 addition & 0 deletions lib/roo/excelx/shared_strings.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def to_html
# Use to_html or to_a for html returns
# See what is happening with commit???
def use_html?(index)
return false if @options[:disable_html_wrapper]
to_html[index][/<([biu]|sup|sub)>/]
end

Expand Down
98 changes: 70 additions & 28 deletions spec/lib/roo/excelx_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -480,34 +480,36 @@
end

describe '#html_strings' do
let(:path) { 'test/files/html_strings_formatting.xlsx' }

it 'returns the expected result' do
expect(subject.excelx_value(1, 1, "Sheet1")).to eq "This has no formatting."
expect(subject.excelx_value(2, 1, "Sheet1")).to eq "<html>This has<b> bold </b>formatting.</html>"
expect(subject.excelx_value(2, 2, "Sheet1")).to eq "<html>This has <i>italics</i> formatting.</html>"
expect(subject.excelx_value(2, 3, "Sheet1")).to eq "<html>This has <u>underline</u> format.</html>"
expect(subject.excelx_value(2, 4, "Sheet1")).to eq "<html>Superscript. x<sup>123</sup></html>"
expect(subject.excelx_value(2, 5, "Sheet1")).to eq "<html>SubScript. T<sub>j</sub></html>"

expect(subject.excelx_value(3, 1, "Sheet1")).to eq "<html>Bold, italics <b><i>together</i></b>.</html>"
expect(subject.excelx_value(3, 2, "Sheet1")).to eq "<html>Bold, Underline <b><u>together</u></b>.</html>"
expect(subject.excelx_value(3, 3, "Sheet1")).to eq "<html>Bold, Superscript. <b>x</b><sup><b>N</b></sup></html>"
expect(subject.excelx_value(3, 4, "Sheet1")).to eq "<html>Bold, Subscript. <b>T</b><sub><b>abc</b></sub></html>"
expect(subject.excelx_value(3, 5, "Sheet1")).to eq "<html>Italics, Underline <i><u>together</u></i>.</html>"
expect(subject.excelx_value(3, 6, "Sheet1")).to eq "<html>Italics, Superscript. <i>X</i><sup><i>abc</i></sup></html>"
expect(subject.excelx_value(3, 7, "Sheet1")).to eq "<html>Italics, Subscript. <i>B</i><sub><i>efg</i></sub></html>"
expect(subject.excelx_value(4, 1, "Sheet1")).to eq "<html>Bold, italics underline,<b><i><u> together</u></i></b>.</html>"
expect(subject.excelx_value(4, 2, "Sheet1")).to eq "<html>Bold, italics, superscript. <b>X</b><sup><b><i>abc</i></b></sup><b><i>123</i></b></html>"
expect(subject.excelx_value(4, 3, "Sheet1")).to eq "<html>Bold, Italics, subscript. <b><i>Mg</i></b><sub><b><i>ha</i></b></sub><b><i>2</i></b></html>"
expect(subject.excelx_value(4, 4, "Sheet1")).to eq "<html>Bold, Underline, superscript. <b><u>AB</u></b><sup><b><u>C12</u></b></sup><b><u>3</u></b></html>"
expect(subject.excelx_value(4, 5, "Sheet1")).to eq "<html>Bold, Underline, subscript. <b><u>Good</u></b><sub><b><u>XYZ</u></b></sub></html>"
expect(subject.excelx_value(4, 6, "Sheet1")).to eq "<html>Italics, Underline, superscript. <i><u>Up</u></i><sup><i><u>swing</u></i></sup></html>"
expect(subject.excelx_value(4, 7, "Sheet1")).to eq "<html>Italics, Underline, subscript. <i><u>T</u></i><sub><i><u>swing</u></i></sub></html>"
expect(subject.excelx_value(5, 1, "Sheet1")).to eq "<html>Bold, italics, underline, superscript. <b><i><u>GHJK</u></i></b><sup><b><i><u>190</u></i></b></sup><b><i><u>4</u></i></b></html>"
expect(subject.excelx_value(5, 2, "Sheet1")).to eq "<html>Bold, italics, underline, subscript. <b><i><u>Mike</u></i></b><sub><b><i><u>drop</u></i></b></sub></html>"
expect(subject.excelx_value(6, 1, "Sheet1")).to eq "See that regular html tags do not create html tags.\n<ol>\n <li> Denver Broncos </li>\n <li> Carolina Panthers </li>\n <li> New England Patriots</li>\n <li>Arizona Panthers</li>\n</ol>"
expect(subject.excelx_value(7, 1, "Sheet1")).to eq "<html>Does create html tags when formatting is used..\n<ol>\n <li> <b>Denver Broncos</b> </li>\n <li> <i>Carolina Panthers </i></li>\n <li> <u>New England Patriots</u></li>\n <li>Arizona Panthers</li>\n</ol></html>"
describe "HTML Parsing Enabling" do
let(:path) { 'test/files/html_strings_formatting.xlsx' }

it 'returns the expected result' do
expect(subject.excelx_value(1, 1, "Sheet1")).to eq("This has no formatting.")
expect(subject.excelx_value(2, 1, "Sheet1")).to eq("<html>This has<b> bold </b>formatting.</html>")
expect(subject.excelx_value(2, 2, "Sheet1")).to eq("<html>This has <i>italics</i> formatting.</html>")
expect(subject.excelx_value(2, 3, "Sheet1")).to eq("<html>This has <u>underline</u> format.</html>")
expect(subject.excelx_value(2, 4, "Sheet1")).to eq("<html>Superscript. x<sup>123</sup></html>")
expect(subject.excelx_value(2, 5, "Sheet1")).to eq("<html>SubScript. T<sub>j</sub></html>")

expect(subject.excelx_value(3, 1, "Sheet1")).to eq("<html>Bold, italics <b><i>together</i></b>.</html>")
expect(subject.excelx_value(3, 2, "Sheet1")).to eq("<html>Bold, Underline <b><u>together</u></b>.</html>")
expect(subject.excelx_value(3, 3, "Sheet1")).to eq("<html>Bold, Superscript. <b>x</b><sup><b>N</b></sup></html>")
expect(subject.excelx_value(3, 4, "Sheet1")).to eq("<html>Bold, Subscript. <b>T</b><sub><b>abc</b></sub></html>")
expect(subject.excelx_value(3, 5, "Sheet1")).to eq("<html>Italics, Underline <i><u>together</u></i>.</html>")
expect(subject.excelx_value(3, 6, "Sheet1")).to eq("<html>Italics, Superscript. <i>X</i><sup><i>abc</i></sup></html>")
expect(subject.excelx_value(3, 7, "Sheet1")).to eq("<html>Italics, Subscript. <i>B</i><sub><i>efg</i></sub></html>")
expect(subject.excelx_value(4, 1, "Sheet1")).to eq("<html>Bold, italics underline,<b><i><u> together</u></i></b>.</html>")
expect(subject.excelx_value(4, 2, "Sheet1")).to eq("<html>Bold, italics, superscript. <b>X</b><sup><b><i>abc</i></b></sup><b><i>123</i></b></html>")
expect(subject.excelx_value(4, 3, "Sheet1")).to eq("<html>Bold, Italics, subscript. <b><i>Mg</i></b><sub><b><i>ha</i></b></sub><b><i>2</i></b></html>")
expect(subject.excelx_value(4, 4, "Sheet1")).to eq("<html>Bold, Underline, superscript. <b><u>AB</u></b><sup><b><u>C12</u></b></sup><b><u>3</u></b></html>")
expect(subject.excelx_value(4, 5, "Sheet1")).to eq("<html>Bold, Underline, subscript. <b><u>Good</u></b><sub><b><u>XYZ</u></b></sub></html>")
expect(subject.excelx_value(4, 6, "Sheet1")).to eq("<html>Italics, Underline, superscript. <i><u>Up</u></i><sup><i><u>swing</u></i></sup></html>")
expect(subject.excelx_value(4, 7, "Sheet1")).to eq("<html>Italics, Underline, subscript. <i><u>T</u></i><sub><i><u>swing</u></i></sub></html>")
expect(subject.excelx_value(5, 1, "Sheet1")).to eq("<html>Bold, italics, underline, superscript. <b><i><u>GHJK</u></i></b><sup><b><i><u>190</u></i></b></sup><b><i><u>4</u></i></b></html>")
expect(subject.excelx_value(5, 2, "Sheet1")).to eq("<html>Bold, italics, underline, subscript. <b><i><u>Mike</u></i></b><sub><b><i><u>drop</u></i></b></sub></html>")
expect(subject.excelx_value(6, 1, "Sheet1")).to eq("See that regular html tags do not create html tags.\n<ol>\n <li> Denver Broncos </li>\n <li> Carolina Panthers </li>\n <li> New England Patriots</li>\n <li>Arizona Panthers</li>\n</ol>")
expect(subject.excelx_value(7, 1, "Sheet1")).to eq("<html>Does create html tags when formatting is used..\n<ol>\n <li> <b>Denver Broncos</b> </li>\n <li> <i>Carolina Panthers </i></li>\n <li> <u>New England Patriots</u></li>\n <li>Arizona Panthers</li>\n</ol></html>")
end
end
end

Expand Down Expand Up @@ -535,3 +537,43 @@
end
end
end

describe 'Roo::Excelx with options set' do
subject(:xlsx) do
Roo::Excelx.new(path, disable_html_wrapper: true)
end

describe '#html_strings' do
describe "HTML Parsing Disabled" do
let(:path) { 'test/files/html_strings_formatting.xlsx' }

it 'returns the expected result' do
expect(subject.excelx_value(1, 1, "Sheet1")).to eq("This has no formatting.")
expect(subject.excelx_value(2, 1, "Sheet1")).to eq("This has bold formatting.")
expect(subject.excelx_value(2, 2, "Sheet1")).to eq("This has italics formatting.")
expect(subject.excelx_value(2, 3, "Sheet1")).to eq("This has underline format.")
expect(subject.excelx_value(2, 4, "Sheet1")).to eq("Superscript. x123")
expect(subject.excelx_value(2, 5, "Sheet1")).to eq("SubScript. Tj")

expect(subject.excelx_value(3, 1, "Sheet1")).to eq("Bold, italics together.")
expect(subject.excelx_value(3, 2, "Sheet1")).to eq("Bold, Underline together.")
expect(subject.excelx_value(3, 3, "Sheet1")).to eq("Bold, Superscript. xN")
expect(subject.excelx_value(3, 4, "Sheet1")).to eq("Bold, Subscript. Tabc")
expect(subject.excelx_value(3, 5, "Sheet1")).to eq("Italics, Underline together.")
expect(subject.excelx_value(3, 6, "Sheet1")).to eq("Italics, Superscript. Xabc")
expect(subject.excelx_value(3, 7, "Sheet1")).to eq("Italics, Subscript. Befg")
expect(subject.excelx_value(4, 1, "Sheet1")).to eq("Bold, italics underline, together.")
expect(subject.excelx_value(4, 2, "Sheet1")).to eq("Bold, italics, superscript. Xabc123")
expect(subject.excelx_value(4, 3, "Sheet1")).to eq("Bold, Italics, subscript. Mgha2")
expect(subject.excelx_value(4, 4, "Sheet1")).to eq("Bold, Underline, superscript. ABC123")
expect(subject.excelx_value(4, 5, "Sheet1")).to eq("Bold, Underline, subscript. GoodXYZ")
expect(subject.excelx_value(4, 6, "Sheet1")).to eq("Italics, Underline, superscript. Upswing")
expect(subject.excelx_value(4, 7, "Sheet1")).to eq("Italics, Underline, subscript. Tswing")
expect(subject.excelx_value(5, 1, "Sheet1")).to eq("Bold, italics, underline, superscript. GHJK1904")
expect(subject.excelx_value(5, 2, "Sheet1")).to eq("Bold, italics, underline, subscript. Mikedrop")
expect(subject.excelx_value(6, 1, "Sheet1")).to eq("See that regular html tags do not create html tags.\n<ol>\n <li> Denver Broncos </li>\n <li> Carolina Panthers </li>\n <li> New England Patriots</li>\n <li>Arizona Panthers</li>\n</ol>")
expect(subject.excelx_value(7, 1, "Sheet1")).to eq("Does create html tags when formatting is used..\n<ol>\n <li> Denver Broncos </li>\n <li> Carolina Panthers </li>\n <li> New England Patriots</li>\n <li>Arizona Panthers</li>\n</ol>")
end
end
end
end

0 comments on commit 20d7810

Please sign in to comment.