diff --git a/lib/docx/document.rb b/lib/docx/document.rb index ff0ec05..cd4d6d0 100755 --- a/lib/docx/document.rb +++ b/lib/docx/document.rb @@ -22,7 +22,7 @@ module Docx class Document include Docx::SimpleInspect - attr_reader :xml, :doc, :zip, :styles + attr_reader :xml, :doc, :zip, :styles, :headers def initialize(path_or_io, options = {}) @replace = {} @@ -40,6 +40,7 @@ def initialize(path_or_io, options = {}) @document_xml = document.get_input_stream.read @doc = Nokogiri::XML(@document_xml) load_styles + load_headers yield(self) if block_given? ensure @zip.close unless @zip.nil? @@ -200,6 +201,15 @@ def with_zip64_disabled Zip.write_zip64_support = previous end + def load_headers + header_files = @zip.glob("word/header*.xml").map{|h| h.name} + filename_and_contents_pairs = header_files.map do |file| + simple_file_name = file.sub(/^word\//, "").sub(/\.xml$/, "") + [simple_file_name, Nokogiri::XML(@zip.read(file))] + end + @headers = Hash[filename_and_contents_pairs] + end + def load_styles @styles_xml = @zip.read('word/styles.xml') @styles = Nokogiri::XML(@styles_xml) diff --git a/spec/docx/document_spec.rb b/spec/docx/document_spec.rb index 8efa8cd..32f8a7a 100755 --- a/spec/docx/document_spec.rb +++ b/spec/docx/document_spec.rb @@ -60,6 +60,18 @@ end end + describe 'read headers' do + before do + @doc = Docx::Document.open(@fixtures_path + '/multi_doc.docx') + end + + it 'can extract headers' do + expect(@doc.headers).to_not be_nil + expect(@doc.headers.keys).to eq ["header1"] + expect(@doc.headers["header1"].text).to eq "Hello from the header." + end + end + describe 'read tables' do before do @doc = Docx::Document.open(@fixtures_path + '/tables.docx') diff --git a/spec/fixtures/multi_doc.docx b/spec/fixtures/multi_doc.docx new file mode 100644 index 0000000..008d06e Binary files /dev/null and b/spec/fixtures/multi_doc.docx differ