How to find and replace ALL text in Word Documents with NPOI? #1186
-
I am attempting to find a set of variables (denoted by I created this TestDocument.docx as an example. The FindAndReplaceValues() Extension Method: public static class XwpfDocumentExtensionMethods
{
/// <summary>
/// Performs a find and replace on a Word document.
/// </summary>
/// <param name="document">The Word document.</param>
/// <param name="replacementValues">The mapping of values to be replaced within the document.</param>
/// <returns>True if a value was replaced in the file, false otherwise.</returns>
public static bool FindAndReplaceValues(this XWPFDocument document, Dictionary<string, string?> replacementValues)
{
bool dirty = false;
foreach (KeyValuePair<string, string?> replacementValue in replacementValues)
{
// Use built-in NPOI Find and Replace first
long initialCharacterCount = document.CountCharacterLength();
document.FindAndReplaceText(document, replacementValue.Key, replacementValue.Value);
long finalCharacterCount = document.CountCharacterLength();
dirty = dirty || initialCharacterCount != finalCharacterCount;
// Do find and replace on elements that NPOI's built-in FindAndReplaceText() method doesn't work on for some reason.
foreach (XWPFHeader? header in document.HeaderList)
{
foreach (XWPFTable? table in header.Tables)
{
dirty = dirty || table.FindAndReplaceValue(replacementValue);
}
foreach (XWPFParagraph? paragraph in header.Paragraphs)
{
dirty = dirty || paragraph.FindAndReplaceValue(replacementValue);
}
}
foreach (XWPFParagraph? paragraph in document.Paragraphs)
{
dirty = dirty || paragraph.FindAndReplaceValue(replacementValue);
}
foreach (XWPFTable table in document.Tables)
{
dirty = dirty || table.FindAndReplaceValue(replacementValue);
}
foreach (XWPFFooter? footer in document.FooterList)
{
foreach (XWPFTable? table in footer.Tables)
{
dirty = dirty || table.FindAndReplaceValue(replacementValue);
}
foreach (XWPFParagraph? paragraph in footer.Paragraphs)
{
dirty = dirty || paragraph.FindAndReplaceValue(replacementValue);
}
}
}
return dirty;
}
private static bool FindAndReplaceValue(this XWPFTable table, KeyValuePair<string, string?> replacementValue)
{
bool dirty = false;
foreach (XWPFTableRow? row in table.Rows)
{
foreach (XWPFTableCell? cell in row.GetTableCells())
{
foreach (XWPFParagraph? paragraph in cell.Paragraphs)
{
if (paragraph.Text.Contains(replacementValue.Key))
{
paragraph.ReplaceText(replacementValue.Key, replacementValue.Value);
dirty = true;
}
}
}
}
return dirty;
}
private static bool FindAndReplaceValue(this XWPFParagraph paragraph, KeyValuePair<string, string?> replacementValue)
{
if (paragraph.Text.Contains(replacementValue.Key))
{
paragraph.ReplaceText(replacementValue.Key, replacementValue.Value);
return true;
}
return false;
}
private static long CountCharacterLength(this XWPFDocument document)
{
long characterCount = 0;
foreach (XWPFHeader? header in document.HeaderList)
{
characterCount += header.Text.Length;
}
foreach (XWPFFooter footer in document.FooterList)
{
characterCount += footer.Text.Length;
}
foreach (XWPFParagraph paragraph in document.Paragraphs)
{
characterCount += paragraph.ParagraphText.Length;
}
foreach (XWPFTable? table in document.Tables)
{
characterCount += table.Text.Length;
}
return characterCount;
}
} So I end up calling it like this: var replacementValues = new Dictionary<string, string>()
{
{"[City]", "Houston"},
{"[State]", "Texas"},
{"[ProjectNumber]", "123456"},
{"[Project Number]", "123456"}
};
var dirty = document.FindAndReplaceValues(replacementValues); |
Beta Was this translation helpful? Give feedback.
Replies: 3 comments 3 replies
-
It seems like this StackOverflow issue is relevant regarding formatting with runs in Word:
|
Beta Was this translation helpful? Give feedback.
-
Here is the code in NPOI for find and replace. I guess one question I have is whether it covers tables in headers and footers based on it iterating through the root document tables property? Tables can also appear in headers and footers. Just based on how the other code is iterating over elements in this method, I'm wondering if that needs to be enhanced? Either way it still didn't help with my sample document above ☝️. |
Beta Was this translation helpful? Give feedback.
-
I ended up modifying my extension method to process the tables contained in the headers and footers since that's where most of the replacements needed to happen in my case. As for detecting is a change was made, I use a hash of the document text so I know whether the document changed. public static class XwpfDocumentExtensionMethods
{
/// <summary>
/// Performs find and replace on a Word document.
/// </summary>
/// <param name="document">The Word document.</param>
/// <param name="replacementValues">The mapping of values to be replaced within the document.</param>
/// <returns>True if a value was replaced in the file, false otherwise.</returns>
public static bool FindAndReplaceValues(this XWPFDocument document, Dictionary<string, string?> replacementValues)
{
var extractor = new XWPFWordExtractor(document);
string initialThumbprint = extractor.Text.GenerateHash();
foreach (KeyValuePair<string, string?> replacementValue in replacementValues)
{
// Try built-in NPOI Find and Replace first
document.FindAndReplaceText(document, replacementValue.Key, replacementValue.Value);
// Now iterate over other elements the built-in find and replace doesn't scan
foreach (XWPFHeader? header in document.HeaderList)
{
foreach (XWPFTable? table in header.Tables)
{
table.FindAndReplaceValue(replacementValue);
}
}
foreach (XWPFFooter? footer in document.FooterList)
{
foreach (XWPFTable? table in footer.Tables)
{
table.FindAndReplaceValue(replacementValue);
}
}
}
string finalThumbprint = extractor.Text.GenerateHash();
return !string.Equals(initialThumbprint, finalThumbprint);
}
private static void FindAndReplaceValue(this XWPFTable table, KeyValuePair<string, string?> replacementValue)
{
foreach (XWPFTableRow? row in table.Rows)
{
foreach (XWPFTableCell? cell in row.GetTableCells())
{
foreach (XWPFParagraph? paragraph in cell.Paragraphs)
{
paragraph.ReplaceText(replacementValue.Key, replacementValue.Value);
}
}
}
}
} |
Beta Was this translation helpful? Give feedback.
I ended up modifying my extension method to process the tables contained in the headers and footers since that's where most of the replacements needed to happen in my case. As for detecting is a change was made, I use a hash of the document text so I know whether the document changed.