Skip to content

Commit

Permalink
Improve namespace handling in find_replace
Browse files Browse the repository at this point in the history
Refactor find_replace logic to support XML namespaces properly. The update substitutes temporary replacement of 'xmlns' attribute with 'xmlnamespace', facilitating namespace-agnostic XPath expressions. Tests added to validate the improved functionality.

Resolves SFDO-Tooling#3771.
  • Loading branch information
leboff committed Apr 12, 2024
1 parent 3020a82 commit 2bf6ce6
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 22 deletions.
19 changes: 19 additions & 0 deletions cumulusci/core/source_transforms/tests/test_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -883,6 +883,25 @@ def test_xpath_replace_with_exp_and_index(task_context):
zip_assert(builder, modified_zip_content)


def test_xpath_replace_with_exp_and_index_has_xmlns(task_context):
zip_content = {
Path(
"Foo.xml"
): '<bookstore xmlns="foobar"> <book category="cooking"> <title lang="en">Everyday Italian</title> <author>Giada De Laurentiis</author> <year>2005</year> <price>30.00</price> </book> <book category="children"> <title lang="en">Harry Potter</title> <author>J K. Rowling</author> <year>2005</year> <price>29.99</price> </book> <book category="web"> <title lang="en">XQuery Kick Start</title> <author>James McGovern</author> <author>Per Bothner</author> <author>Kurt Cagle</author> <author>James Linn</author> <author>Vaidyanathan Nagarajan</author> <year>2003</year> <price>49.99</price> </book> <book category="web"> <title lang="en">Learning XML</title> <author>Erik T. Ray</author> <year>2003</year> <price>39.95</price> </book> </bookstore>',
}
patterns = [
{"xpath": "/bookstore/book[price>40]/author[2]", "replace": "Rich Author"}
]
builder = create_builder(task_context, zip_content, patterns)

modified_zip_content = {
Path(
"Foo.xml"
): '<bookstore xmlns="foobar"> <book category="cooking"> <title lang="en">Everyday Italian</title> <author>Giada De Laurentiis</author> <year>2005</year> <price>30.00</price> </book> <book category="children"> <title lang="en">Harry Potter</title> <author>J K. Rowling</author> <year>2005</year> <price>29.99</price> </book> <book category="web"> <title lang="en">XQuery Kick Start</title> <author>James McGovern</author> <author>Rich Author</author> <author>Kurt Cagle</author> <author>James Linn</author> <author>Vaidyanathan Nagarajan</author> <year>2003</year> <price>49.99</price> </book> <book category="web"> <title lang="en">Learning XML</title> <author>Erik T. Ray</author> <year>2003</year> <price>39.95</price> </book> </bookstore>',
}
zip_assert(builder, modified_zip_content)


def test_xpath_replace_with_exp_and_index2(task_context):
zip_content = {
Path(
Expand Down
25 changes: 3 additions & 22 deletions cumulusci/core/source_transforms/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import functools
import io
import os
import re
import shutil
import typing as T
import zipfile
Expand Down Expand Up @@ -416,24 +415,6 @@ def __init__(self, options: FindReplaceTransformOptions):
self.options = options

def process(self, zf: ZipFile, context: TaskContext) -> ZipFile:
# To handle xpath with namespaces, without
def transform_xpath(expression):
predicate_pattern = re.compile(r"\[.*?\]")
parts = expression.split("/")
transformed_parts = []

for part in parts:
if part:
predicates = predicate_pattern.findall(part)
tag = predicate_pattern.sub("", part)
transformed_part = '/*[local-name()="' + tag + '"]'
for predicate in predicates:
transformed_part += predicate
transformed_parts.append(transformed_part)
transformed_expression = "".join(transformed_parts)

return transformed_expression

def process_file(filename: str, content: str) -> T.Tuple[str, str]:
path = Path(filename)
for spec in self.options.patterns:
Expand All @@ -442,6 +423,7 @@ def process_file(filename: str, content: str) -> T.Tuple[str, str]:
):
try:
# See if the content is an xml file
content = content.replace(' xmlns="', ' xmlnamespace="')
content_bytes = content.encode("utf-8")
root = ET.fromstring(content_bytes)

Expand All @@ -460,16 +442,15 @@ def process_file(filename: str, content: str) -> T.Tuple[str, str]:
stack.extend(element)
# Modify the element given by xpath
elif spec.xpath:
transformed_xpath = transform_xpath(spec.xpath)
elements_to_replace = root.xpath(transformed_xpath)
elements_to_replace = root.xpath(spec.xpath)
for element in elements_to_replace:
element.text = spec.get_replace_string(context)

# Add xml declaration back to file, if it initally had xml declaration
content = ET.tostring(
root, encoding="utf-8", xml_declaration=has_xml_declaration
).decode("utf-8")

content = content.replace(' xmlnamespace="', ' xmlns="')
except ET.XMLSyntaxError:
if spec.find:
content = content.replace(
Expand Down

0 comments on commit 2bf6ce6

Please sign in to comment.