Improve namespace handling in find_replace

Refactor find_replace logic to support XML namespaces properly. The update substitutes temporary replacement of 'xmlns' attribute with 'xmlnamespace', facilitating namespace-agnostic XPath expressions. Tests added to validate the improved functionality. Resolves SFDO-Tooling#3771.
leboff · Apr 12, 2024 · 2bf6ce6 · 2bf6ce6
1 parent 3020a82
commit 2bf6ce6
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 22 deletions.
diff --git a/cumulusci/core/source_transforms/tests/test_transforms.py b/cumulusci/core/source_transforms/tests/test_transforms.py
@@ -883,6 +883,25 @@ def test_xpath_replace_with_exp_and_index(task_context):
     zip_assert(builder, modified_zip_content)
 
 
+def test_xpath_replace_with_exp_and_index_has_xmlns(task_context):
+    zip_content = {
+        Path(
+            "Foo.xml"
+        ): '<bookstore xmlns="foobar"> <book category="cooking"> <title lang="en">Everyday Italian</title> <author>Giada De Laurentiis</author> <year>2005</year> <price>30.00</price> </book> <book category="children"> <title lang="en">Harry Potter</title> <author>J K. Rowling</author> <year>2005</year> <price>29.99</price> </book> <book category="web"> <title lang="en">XQuery Kick Start</title> <author>James McGovern</author> <author>Per Bothner</author> <author>Kurt Cagle</author> <author>James Linn</author> <author>Vaidyanathan Nagarajan</author> <year>2003</year> <price>49.99</price> </book> <book category="web"> <title lang="en">Learning XML</title> <author>Erik T. Ray</author> <year>2003</year> <price>39.95</price> </book> </bookstore>',
+    }
+    patterns = [
+        {"xpath": "/bookstore/book[price>40]/author[2]", "replace": "Rich Author"}
+    ]
+    builder = create_builder(task_context, zip_content, patterns)
+
+    modified_zip_content = {
+        Path(
+            "Foo.xml"
+        ): '<bookstore xmlns="foobar"> <book category="cooking"> <title lang="en">Everyday Italian</title> <author>Giada De Laurentiis</author> <year>2005</year> <price>30.00</price> </book> <book category="children"> <title lang="en">Harry Potter</title> <author>J K. Rowling</author> <year>2005</year> <price>29.99</price> </book> <book category="web"> <title lang="en">XQuery Kick Start</title> <author>James McGovern</author> <author>Rich Author</author> <author>Kurt Cagle</author> <author>James Linn</author> <author>Vaidyanathan Nagarajan</author> <year>2003</year> <price>49.99</price> </book> <book category="web"> <title lang="en">Learning XML</title> <author>Erik T. Ray</author> <year>2003</year> <price>39.95</price> </book> </bookstore>',
+    }
+    zip_assert(builder, modified_zip_content)
+
+
 def test_xpath_replace_with_exp_and_index2(task_context):
     zip_content = {
         Path(

diff --git a/cumulusci/core/source_transforms/transforms.py b/cumulusci/core/source_transforms/transforms.py
@@ -2,7 +2,6 @@
 import functools
 import io
 import os
-import re
 import shutil
 import typing as T
 import zipfile
@@ -416,24 +415,6 @@ def __init__(self, options: FindReplaceTransformOptions):
         self.options = options
 
     def process(self, zf: ZipFile, context: TaskContext) -> ZipFile:
-        # To handle xpath with namespaces, without
-        def transform_xpath(expression):
-            predicate_pattern = re.compile(r"\[.*?\]")
-            parts = expression.split("/")
-            transformed_parts = []
-
-            for part in parts:
-                if part:
-                    predicates = predicate_pattern.findall(part)
-                    tag = predicate_pattern.sub("", part)
-                    transformed_part = '/*[local-name()="' + tag + '"]'
-                    for predicate in predicates:
-                        transformed_part += predicate
-                    transformed_parts.append(transformed_part)
-            transformed_expression = "".join(transformed_parts)
-
-            return transformed_expression
-
         def process_file(filename: str, content: str) -> T.Tuple[str, str]:
             path = Path(filename)
             for spec in self.options.patterns:
@@ -442,6 +423,7 @@ def process_file(filename: str, content: str) -> T.Tuple[str, str]:
                 ):
                     try:
                         # See if the content is an xml file
+                        content = content.replace(' xmlns="', ' xmlnamespace="')
                         content_bytes = content.encode("utf-8")
                         root = ET.fromstring(content_bytes)
 
@@ -460,16 +442,15 @@ def process_file(filename: str, content: str) -> T.Tuple[str, str]:
                                 stack.extend(element)
                         # Modify the element given by xpath
                         elif spec.xpath:
-                            transformed_xpath = transform_xpath(spec.xpath)
-                            elements_to_replace = root.xpath(transformed_xpath)
+                            elements_to_replace = root.xpath(spec.xpath)
                             for element in elements_to_replace:
                                 element.text = spec.get_replace_string(context)
 
                         # Add xml declaration back to file, if it initally had xml declaration
                         content = ET.tostring(
                             root, encoding="utf-8", xml_declaration=has_xml_declaration
                         ).decode("utf-8")
-
+                        content = content.replace(' xmlnamespace="', ' xmlns="')
                     except ET.XMLSyntaxError:
                         if spec.find:
                             content = content.replace(