Skip to content

Commit

Permalink
docx: make docxStyle work in header/footer of docx document
Browse files Browse the repository at this point in the history
fix #994
  • Loading branch information
bjrmatos committed Nov 14, 2022
1 parent 9249bd9 commit 0606fb0
Show file tree
Hide file tree
Showing 8 changed files with 330 additions and 79 deletions.
2 changes: 1 addition & 1 deletion packages/jsreport-docx/lib/postprocess/postprocess.js
Expand Up @@ -21,7 +21,7 @@ module.exports = async (files, options) => {
await bookmark(files, headerFooterRefs, newBookmarksMap)
await watermark(files)
await pageBreak(files)
style(files)
style(files, headerFooterRefs)
await drawingObject(files, headerFooterRefs, newBookmarksMap, options)
link(files)
form(files)
Expand Down
138 changes: 89 additions & 49 deletions packages/jsreport-docx/lib/postprocess/style.js
Expand Up @@ -3,6 +3,95 @@ const { nodeListToArray, serializeXml } = require('../utils')

// see the preprocess/styles.js for some explanation

module.exports = (files, headerFooterRefs) => {
const documentFile = files.find(f => f.path === 'word/document.xml')

documentFile.data = documentFile.data.replace(/<docxStyles[^/]*\/>.*?(?=<docxStyleEnd\/>)<docxStyleEnd\/>/g, (val) => {
// no need to pass xml namespaces here because the nodes there are just used for reads,
// and are not inserted (re-used) somewhere else
const doc = new DOMParser().parseFromString(`<docxXml>${val}</docxXml>`)
const docxStylesEl = doc.getElementsByTagName('docxStyles')[0]
const docxStyleEndEl = doc.getElementsByTagName('docxStyleEnd')[0]
const runEls = doc.getElementsByTagName('w:r')

processDocxStylesEl(docxStylesEl, docxStyleEndEl, runEls, doc)

return serializeXml(doc).replace('<docxXml>', '').replace('</docxXml>', '')
})

for (const { doc: headerFooterDoc } of headerFooterRefs) {
const docxStylesEls = nodeListToArray(headerFooterDoc.getElementsByTagName('docxStyles'))

for (const docxStylesEl of docxStylesEls) {
let currentEl = docxStylesEl.nextSibling
let docxStyleEndEl
const middleEls = []
const runEls = []

if (currentEl != null) {
do {
if (currentEl.nodeName === 'docxStyleEnd') {
docxStyleEndEl = currentEl
currentEl = null
} else {
middleEls.push(currentEl)
currentEl = currentEl.nextSibling
}
} while (currentEl != null)
}

if (docxStyleEndEl == null) {
throw new Error('Could not find docxStyleEnd element for docxStyle processing')
}

for (const el of middleEls) {
const currentREls = nodeListToArray(el.getElementsByTagName('w:r'))
runEls.push(...currentREls)
}

processDocxStylesEl(docxStylesEl, docxStyleEndEl, runEls, headerFooterDoc)
}
}
}

function processDocxStylesEl (docxStylesEl, docxStyleEndEl, runEls, doc) {
let started = false
let currentStyleEl

for (let i = 0; i < runEls.length; i++) {
const wR = runEls[i]
const ts = wR.getElementsByTagName('w:t')
if (ts.length === 0) {
continue
}

if (started === false && ts[0].textContent.includes('$docxStyleStart')) {
started = true
const startIdx = ts[0].textContent.indexOf('$docxStyleStart')
ts[0].textContent = ts[0].textContent.replace('$docxStyleStart', '')
const id = ts[0].textContent.substring(startIdx, ts[0].textContent.indexOf('$'))
ts[0].textContent = ts[0].textContent.replace(id + '$', '')
currentStyleEl = nodeListToArray(docxStylesEl.childNodes).find(n => n.getAttribute('id') === id)
}

if (ts[0].textContent.includes('$docxStyleEnd')) {
ts[0].setAttribute('xml:space', 'preserve')
started = false
ts[0].textContent = ts[0].textContent.replace('$docxStyleEnd', '')
color(doc, wR, currentStyleEl)
continue
}

if (started === true) {
ts[0].setAttribute('xml:space', 'preserve')
color(doc, wR, currentStyleEl)
}
}

docxStylesEl.parentNode.removeChild(docxStylesEl)
docxStyleEndEl.parentNode.removeChild(docxStyleEndEl)
}

function color (doc, wR, currentStyleEl) {
let wp = wR.parentNode

Expand Down Expand Up @@ -67,52 +156,3 @@ function color (doc, wR, currentStyleEl) {
color.removeAttribute('w:themeColor')
}
}

module.exports = (files) => {
const documentFile = files.find(f => f.path === 'word/document.xml')

documentFile.data = documentFile.data.replace(/<docxStyles[^/]*\/>.*?(?=<docxStyleEnd\/>)<docxStyleEnd\/>/g, (val) => {
// no need to pass xml namespaces here because the nodes there are just used for reads,
// and are not inserted (re-used) somewhere else
const doc = new DOMParser().parseFromString('<docxXml>' + val + '</docxXml>')
const docxStylesEl = doc.getElementsByTagName('docxStyles')[0]

const wrs = doc.getElementsByTagName('w:r')
let started = false
let currentStyleEl
for (let i = 0; i < wrs.length; i++) {
const wR = wrs[i]
const ts = wR.getElementsByTagName('w:t')
if (ts.length === 0) {
continue
}

if (started === false && ts[0].textContent.includes('$docxStyleStart')) {
started = true
ts[0].textContent = ts[0].textContent.replace('$docxStyleStart', '')
const id = ts[0].textContent.substring(0, ts[0].textContent.indexOf('$'))
ts[0].textContent = ts[0].textContent.replace(id + '$', '')
currentStyleEl = nodeListToArray(docxStylesEl.childNodes).find(n => n.getAttribute('id') === id)
}

if (ts[0].textContent.includes('$docxStyleEnd')) {
ts[0].setAttribute('xml:space', 'preserve')
started = false
ts[0].textContent = ts[0].textContent.replace('$docxStyleEnd', '')
color(doc, wR, currentStyleEl)
continue
}

if (started === true) {
ts[0].setAttribute('xml:space', 'preserve')
color(doc, wR, currentStyleEl)
}
}

docxStylesEl.parentNode.removeChild(docxStylesEl)
const docxStyleEndEl = doc.getElementsByTagName('docxStyleEnd')[0]
docxStyleEndEl.parentNode.removeChild(docxStyleEndEl)

return serializeXml(doc).replace('<docxXml>', '').replace('</docxXml>', '')
})
}
72 changes: 47 additions & 25 deletions packages/jsreport-docx/lib/preprocess/style.js
@@ -1,3 +1,5 @@
const { nodeListToArray, normalizeSingleTextElInRun, normalizeSingleContentInText } = require('../utils')

// the specification of {{docxStyle}} is very rich
// it should add color to all text between the docxStyle tag
// the problematic part is docxStyle crossing multiple paragraphs and ending in the middle
Expand All @@ -12,34 +14,36 @@ const regexp = /{{#docxStyle [^{}]{0,500}}}/

let styleIdCounter = 1

function processClosingTag (doc, openingEl, el, stylesEl) {
const styleId = styleIdCounter++
const helperCall = openingEl.textContent.match(regexp)[0]
module.exports = (files) => {
for (const f of files.filter(f => f.path.endsWith('.xml'))) {
const doc = f.doc

const fakeElement = doc.createElement('docxRemove')
fakeElement.textContent = helperCall.replace('{{#docxStyle', `{{docxStyle id=${styleId}`)
stylesEl.appendChild(fakeElement)
const textElementsWithDocxStyle = nodeListToArray(doc.getElementsByTagName('w:t')).filter((tEl) => {
return tEl.textContent.includes('{{#docxStyle') || tEl.textContent.includes('{{/docxStyle}}')
})

openingEl.textContent = openingEl.textContent.replace(regexp, `$docxStyleStart${styleId}$`)
el.textContent = el.textContent.replace('{{/docxStyle}}', '$docxStyleEnd')
// first we normalize that w:r elements containing the docxStyle calls only contain one child w:t element
// usually office does not generated documents like this but it is valid that
// the w:r element can contain multiple w:t elements
for (const textEl of textElementsWithDocxStyle) {
normalizeSingleTextElInRun(textEl, doc)
}

const wpElement = el.parentNode.parentNode
if (!wpElement.nextSibling || wpElement.nextSibling.tagName !== 'docxStyleEnd') {
const docxStyleEnd = doc.createElement('docxStyleEnd')
wpElement.parentNode.insertBefore(docxStyleEnd, wpElement.nextSibling)
}
}
// now we normalize that docxStyle calls are in its own w:t element and other text
// is split into new w:t element

module.exports = (files) => {
for (const f of files.filter(f => f.path.endsWith('.xml'))) {
const doc = f.doc
const elements = doc.getElementsByTagName('w:t')
for (const textEl of textElementsWithDocxStyle) {
normalizeSingleContentInText(textEl, getDocxStyleCallRegexp, doc)
}

// we query the text elements again after the normalization
const textElements = nodeListToArray(doc.getElementsByTagName('w:t'))

let openingEl = null
let docxStylesEl = null

for (let i = 0; i < elements.length; i++) {
const el = elements[i]
for (let i = 0; i < textElements.length; i++) {
const el = textElements[i]

if (el.textContent.includes('{{/docxStyle}}') && openingEl) {
processClosingTag(doc, openingEl, el, docxStylesEl)
Expand All @@ -56,12 +60,30 @@ module.exports = (files) => {
docxStylesEl = doc.createElement('docxStyles')
wpElement.parentNode.insertBefore(docxStylesEl, wpElement)
}

if (el.textContent.includes('{{/docxStyle')) {
processClosingTag(doc, openingEl, el, docxStylesEl)
openingEl = null
}
}
}
}
}

function processClosingTag (doc, openingEl, el, stylesEl) {
const styleId = styleIdCounter++
const helperCall = openingEl.textContent.match(regexp)[0]

const fakeElement = doc.createElement('docxRemove')
fakeElement.textContent = helperCall.replace('{{#docxStyle', `{{docxStyle id=${styleId}`)
stylesEl.appendChild(fakeElement)

openingEl.textContent = openingEl.textContent.replace(regexp, `$docxStyleStart${styleId}$`)
el.textContent = el.textContent.replace('{{/docxStyle}}', '$docxStyleEnd')

const wpElement = el.parentNode.parentNode

if (!wpElement.nextSibling || wpElement.nextSibling.tagName !== 'docxStyleEnd') {
const docxStyleEnd = doc.createElement('docxStyleEnd')
wpElement.parentNode.insertBefore(docxStyleEnd, wpElement.nextSibling)
}
}

function getDocxStyleCallRegexp () {
return /{{[#/]docxStyle ?[^{}]{0,500}}}/
}
8 changes: 4 additions & 4 deletions packages/jsreport-docx/test/imageTest.js
Expand Up @@ -668,8 +668,8 @@ describe('docx image', () => {

const withImageInHeader = []

for (const footerPath of ['word/header1.xml', 'word/header2.xml', 'word/header3.xml']) {
const outputInHeaderImageSize = await getImageSize(result.content, footerPath)
for (const headerPath of ['word/header1.xml', 'word/header2.xml', 'word/header3.xml']) {
const outputInHeaderImageSize = await getImageSize(result.content, headerPath)

if (outputInHeaderImageSize == null) {
continue
Expand Down Expand Up @@ -787,8 +787,8 @@ describe('docx image', () => {

const withImageInHeader = []

for (const footerPath of ['word/header1.xml', 'word/header2.xml', 'word/header3.xml']) {
const outputInHeaderImageSize = await getImageSize(result.content, footerPath)
for (const headerPath of ['word/header1.xml', 'word/header2.xml', 'word/header3.xml']) {
const outputInHeaderImageSize = await getImageSize(result.content, headerPath)

if (outputInHeaderImageSize == null) {
continue
Expand Down
Binary file added packages/jsreport-docx/test/style-footer.docx
Binary file not shown.
Binary file not shown.
Binary file added packages/jsreport-docx/test/style-header.docx
Binary file not shown.

0 comments on commit 0606fb0

Please sign in to comment.