diff --git a/CHANGELOG.md b/CHANGELOG.md index 56347204..dbed017f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,9 @@ - Roo::Base::TEMP_PREFIX should be accessed via Roo::TEMP_PREFIX - The private Roo::Base#make_tempdir is now available at the class level in classes that use tempdirs, added via Roo::Tempdir +======= +### Added +- Discard hiperlinks lookups to allow streaming parsing without loading whole files ## [2.4.0] 2016-05-14 ### Fixed diff --git a/lib/roo/excelx.rb b/lib/roo/excelx.rb index d39562f4..908c9c5e 100644 --- a/lib/roo/excelx.rb +++ b/lib/roo/excelx.rb @@ -39,6 +39,7 @@ def initialize(filename_or_stream, options = {}) cell_max = options.delete(:cell_max) sheet_options = {} sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false) + sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false) unless is_stream?(filename_or_stream) file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed) diff --git a/lib/roo/excelx/sheet_doc.rb b/lib/roo/excelx/sheet_doc.rb index 25e38d9e..a705958c 100755 --- a/lib/roo/excelx/sheet_doc.rb +++ b/lib/roo/excelx/sheet_doc.rb @@ -39,8 +39,13 @@ def each_row_streaming(&block) def each_cell(row_xml) return [] unless row_xml row_xml.children.each do |cell_element| - key = ::Roo::Utils.ref_to_key(cell_element['r']) - yield cell_from_xml(cell_element, hyperlinks(@relationships)[key]) + # If you're sure you're not going to need this hyperlinks you can discard it + hyperlinks = unless @options[:no_hyperlinks] + key = ::Roo::Utils.ref_to_key(cell_element['r']) + hyperlinks(@relationships)[key] + end + + yield cell_from_xml(cell_element, hyperlinks) end end