Skip to content

Commit

Permalink
Allow access to other XML docs in docx file like the headers and footers
Browse files Browse the repository at this point in the history
  • Loading branch information
yjukaku committed Oct 22, 2019
1 parent 8c40b4c commit 990de9c
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 6 deletions.
57 changes: 51 additions & 6 deletions lib/docx/document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,24 @@ module Docx
# puts d.text
# end
class Document
attr_reader :xml, :doc, :zip, :styles

# A path with * indicates that there are possibly multiple documents
# matching that glob, eg. word/header1.xml, word/header2.xml
DOCUMENT_PATHS = {
doc: "word/document.xml",
styles: "word/styles.xml",
headers: "word/header*.xml",
footers: "word/footer*.xml",
numbering: "word/numbering.xml"
}

attr_reader :xml, :doc, :zip, :styles, :headers, :footers, :numbering

def initialize(path, &block)
@replace = {}
@zip = Zip::File.open(path)
@document_xml = @zip.read('word/document.xml')
@doc = Nokogiri::XML(@document_xml)
@styles_xml = @zip.read('word/styles.xml')
@styles = Nokogiri::XML(@styles_xml)
extract_documents

if block_given?
yield self
@zip.close
Expand Down Expand Up @@ -123,13 +132,49 @@ def replace_entry(entry_path, file_contents)

private

def extract_documents
DOCUMENT_PATHS.each do |attr_name, path|
if path.match /\*/
extract_multiple_documents_from_globbed_path(attr_name, path)
else
extract_single_document_from_path(attr_name, path)
end
end
end

def extract_single_document_from_path(attr_name, path)
if @zip.find_entry(path)
xml_doc = @zip.read(path)
self.instance_variable_set(:"@#{attr_name}", Nokogiri::XML(xml_doc))
end
end

def extract_multiple_documents_from_globbed_path(hash_attr_name, glob_path)
files = @zip.glob(glob_path).map { |h| h.name }
filename_and_contents_pairs = files.map do |file|
simple_file_name = file.sub(/^word\//, "").sub(/\.xml$/, "")
[simple_file_name, Nokogiri::XML(@zip.read(file))]
end
hash = Hash[filename_and_contents_pairs]
self.instance_variable_set(:"@#{hash_attr_name}", hash)
end

#--
# TODO: Flesh this out to be compatible with other files
# TODO: Method to set flag on files that have been edited, probably by inserting something at the
# end of methods that make edits?
#++
def update
replace_entry "word/document.xml", doc.serialize(:save_with => 0)
DOCUMENT_PATHS.each do |attr_name, path|
if path.match /\*/
self.instance_variable_get("@#{attr_name}").each do |simple_file_name, contents|
replace_entry("word/#{simple_file_name}.xml", contents.serialize(:save_with => 0))
end
else
xml_document = self.instance_variable_get("@#{attr_name}")
replace_entry path, xml_document.serialize(:save_with => 0) if xml_document
end
end
end

# generate Elements::Containers::Paragraph from paragraph XML node
Expand Down
16 changes: 16 additions & 0 deletions spec/docx/document_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,22 @@
end
end

describe 'multiple documents' do
before do
@doc = Docx::Document.open(@fixtures_path + '/multi_doc.docx')
end

it 'should extract all inner documents' do
expect(@doc.doc).to_not be_nil
expect(@doc.styles).to_not be_nil
expect(@doc.headers).to_not be_nil
expect(@doc.headers["header1"].text).to eq "Hello from the header."
expect(@doc.footers).to_not be_nil
expect(@doc.footers["footer1"].text).to eq "Hello from the footer."
expect(@doc.numbering).to_not be_nil
end
end

describe 'saving' do
before do
@doc = Docx::Document.open(@fixtures_path + '/saving.docx')
Expand Down
Binary file added spec/fixtures/multi_doc.docx
Binary file not shown.

0 comments on commit 990de9c

Please sign in to comment.