Merge pull request #3594 from raspberrypi/lxml-update

nelliemckesson · web-flow · commit 9c658d3097b3 · 2024-04-30T08:36:14.000-07:00
Update lxml version and fix some syntax warnings
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -40,13 +40,15 @@ jobs:
         uses: carlosperate/arm-none-eabi-gcc-action@v1.8.1
       - name: Install Doxygen
         run: |
-          wget https://www.doxygen.nl/files/doxygen-1.9.6.linux.bin.tar.gz
-          tar xf doxygen-1.9.6.linux.bin.tar.gz -C "$HOME"
-          echo "$HOME/doxygen-1.9.6/bin" >> $GITHUB_PATH
+          wget https://www.doxygen.nl/files/doxygen-1.10.0.linux.bin.tar.gz
+          tar xf doxygen-1.10.0.linux.bin.tar.gz -C "$HOME"
+          echo "$HOME/doxygen-1.10.0/bin" >> $GITHUB_PATH
       - name: Build Doxygen documentation
         run: make build_doxygen_adoc
       - name: Build documentation
         run: make -j 2
+      - name: Run tests
+        run: ./tests/run_documentation_tests.sh
       - name: Deploy to Mythic Beasts
         if: ${{ github.ref == 'refs/heads/master' }}
         uses: ./.github/actions/deploy-action
diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,2 @@
 pyyaml == 6.0.1
-lxml == 4.9.3
+lxml
diff --git a/scripts/create_build_adoc_doxygen.py b/scripts/create_build_adoc_doxygen.py
@@ -10,13 +10,13 @@
 def check_no_markdown(filename):
     with open(filename) as fh:
         asciidoc = fh.read()
-        if re.search('```\n.*?\n```', asciidoc):
+        if re.search(r'```\n.*?\n```', asciidoc):
             raise Exception("{} uses triple-backticks for markup - please use four-hyphens instead".format(filename))
         # strip out code blocks
-        asciidoc = re.sub('----\n.*?\n----', '', asciidoc, flags=re.DOTALL)
+        asciidoc = re.sub(r'----\n.*?\n----', '', asciidoc, flags=re.DOTALL)
         # strip out pass-through blocks
-        asciidoc = re.sub('\+\+\+\+\n.*?\n\+\+\+\+', '', asciidoc, flags=re.DOTALL)
-        if re.search('(?:^|\n)#+', asciidoc):
+        asciidoc = re.sub(r'\+\+\+\+\n.*?\n\+\+\+\+', '', asciidoc, flags=re.DOTALL)
+        if re.search(r'(?:^|\n)#+', asciidoc):
             raise Exception("{} contains a Markdown-style header (i.e. '#' rather than '=')".format(filename))
         if re.search(r'(\[.+?\]\(.+?\))', asciidoc):
             raise Exception("{} contains a Markdown-style link (i.e. '[title](url)' rather than 'url[title]')".format(filename))
@@ -67,7 +67,7 @@ def check_no_markdown(filename):
                 if not seen_header:
                     seen_header = True
             else:
-                m = re.match('^(include::)(.+)(\[\]\n?)$', line)
+                m = re.match(r'^(include::)(.+)(\[\]\n?)$', line)
                 if m:
                     line = m.group(1) + os.path.join('{includedir}/{parentdir}', m.group(2)) + m.group(3)
             new_contents += line
diff --git a/scripts/create_output_supplemental_data.py b/scripts/create_output_supplemental_data.py
@@ -9,7 +9,7 @@ def get_release_version(doxyfile_path):
 	version = "unknown"
 	with open(doxyfile_path) as f:
 		doxy_content = f.read()
-	version_search = re.search("(\nPROJECT_NUMBER\s*=\s*)([\d.]+)", doxy_content)
+	version_search = re.search(r"(\nPROJECT_NUMBER\s*=\s*)([\d.]+)", doxy_content)
 	if version_search is not None:
 		version = version_search.group(2)
 	return version
diff --git a/scripts/tests/test_doxygen_adoc.py b/scripts/tests/test_doxygen_adoc.py
@@ -40,31 +40,31 @@ def test_doxygen_adoc_variables(self):
 		# test will fail if ANY of the below are different or missing
 		expected = {
 			"pico-sdk/index_doxygen.adoc" : [
-				":doctitle: Raspberry Pi Documentation - Introduction",
+				":doctitle: Introduction - Raspberry Pi Documentation",
 				":page-sub_title: Introduction"
 			],
 			"pico-sdk/hardware.adoc": [
-				":doctitle: Raspberry Pi Documentation - Hardware APIs",
+				":doctitle: Hardware APIs - Raspberry Pi Documentation",
 				":page-sub_title: Hardware APIs"
 			],
 			"pico-sdk/high_level.adoc": [
-				":doctitle: Raspberry Pi Documentation - High Level APIs",
+				":doctitle: High Level APIs - Raspberry Pi Documentation",
 				":page-sub_title: High Level APIs"
 			],
 			"pico-sdk/third_party.adoc": [
-				":doctitle: Raspberry Pi Documentation - Third-party Libraries",
+				":doctitle: Third-party Libraries - Raspberry Pi Documentation",
 				":page-sub_title: Third-party Libraries"
 			],
 			"pico-sdk/networking.adoc": [
-				":doctitle: Raspberry Pi Documentation - Networking Libraries",
+				":doctitle: Networking Libraries - Raspberry Pi Documentation",
 				":page-sub_title: Networking Libraries"
 			],
 			"pico-sdk/runtime.adoc": [
-				":doctitle: Raspberry Pi Documentation - Runtime Infrastructure",
+				":doctitle: Runtime Infrastructure - Raspberry Pi Documentation",
 				":page-sub_title: Runtime Infrastructure"
 			],
 			"pico-sdk/misc.adoc": [
-				":doctitle: Raspberry Pi Documentation - External API Headers",
+				":doctitle: External API Headers - Raspberry Pi Documentation",
 				":page-sub_title: External API Headers"
 			]
 		}
diff --git a/scripts/transform_doxygen_html.py b/scripts/transform_doxygen_html.py
@@ -14,6 +14,12 @@
 # TO DO:
 # do internal href links need to be updated?
 
+def add_next_with_tail(target, inserted):
+  if target.tail is not None:
+      inserted.tail = inserted.tail + target.tail if inserted.tail is not None else target.tail
+  target.tail = None
+  target.addnext(inserted)
+
 def get_all_text(node):
   text = node.text if node.text else None
   if text:
@@ -69,7 +75,7 @@ def make_attribute_selector(sel, item):
         if "*" in att_value:
           contains = True
       if contains == True:
-        val = re.sub("\*", "", " ".join(att["value"]))
+        val = re.sub(r"\*", "", " ".join(att["value"]))
         atts.append("contains(@" + att["name"] + ",'" + val + "')")
       else:
         # otherwise it's a normal attribute selector
@@ -200,16 +206,16 @@ def transform_element(item, root, is_child=False):
           # set attributes, add text/tail, and add children
           new_tree = add_content_to_tree(new_tree, match)
           # add the new tree to the document
-          match.addnext(new_tree)
+          add_next_with_tail(match, new_tree)
           # remove the old element
           match.getparent().remove(match)
         else:
           # if there is no tree, the element should be removed
           # first, preserve any children:
           for child in reversed(match.findall("./*")):
-            match.addnext(child)
+            add_next_with_tail(match, child)
           # handle the tail if needed
-          if match.tail is not None and re.search("\S", match.tail) is not None:
+          if match.tail is not None and re.search(r"\S", match.tail) is not None:
             prev = match.getprevious()
             if prev is not None:
               prev.tail = prev.tail + match.tail if prev.tail is not None else match.tail
@@ -309,7 +315,7 @@ def find_item_in_toc(h_json, filename):
 
 def fix_external_links(adoc, h_json):
   try:
-    matches = re.findall("(href=[\"'])([^\s>]*?)([\"'])", adoc)
+    matches = re.findall(r"(href=[\"'])([^\s>]*?)([\"'])", adoc)
     for match in matches:
       href = match[1]
       # href = match.get("href")
@@ -452,7 +458,7 @@ def retag_heading(head, headtype):
     else:
       newel.set("id", head.get("id"))
     newel.text = text
-    head.addnext(newel)
+    add_next_with_tail(head, newel)
     head.getparent().remove(head)
   except Exception as e:
     exc_type, exc_obj, exc_tb = sys.exc_info()
@@ -475,11 +481,11 @@ def prep_for_adoc(root):
 def make_adoc(root_string, title_text, filename):
   try:
     my_id = make_filename_id(filename)
-    root_string = re.sub("<\/div>\s*?$", "", root_string, flags=re.S)
-    root_string = re.sub('<div class="contents" id="\S*?">', "", root_string)
+    root_string = re.sub(r"<\/div>\s*?$", "", root_string, flags=re.S)
+    root_string = re.sub(r'<div class="contents" id="\S*?">', "", root_string)
     root_string = "[["+my_id+"]]\n== " + title_text + "\n\n++++\n" + root_string
-    root_string = re.sub('(<p[^>]+class="adoc-h2"[^>]*id=")([^"]+)("[^>]*>\s*)(.*?)(<\/p>)', '\n++++\n\n[[\\2]]\n=== \\4\n\n++++\n', root_string, flags=re.S)
-    root_string = re.sub('(<p[^>]+class="adoc-h3"[^>]*id=")([^"]+)("[^>]*>\s*)(.*?)(<\/p>)', '\n++++\n\n[[\\2]]\n==== \\4\n\n++++\n', root_string, flags=re.S)
+    root_string = re.sub(r'(<p[^>]+class="adoc-h2"[^>]*id=")([^"]+)("[^>]*>\s*)(.*?)(<\/p>)', '\n++++\n\n[[\\2]]\n=== \\4\n\n++++\n', root_string, flags=re.S)
+    root_string = re.sub(r'(<p[^>]+class="adoc-h3"[^>]*id=")([^"]+)("[^>]*>\s*)(.*?)(<\/p>)', '\n++++\n\n[[\\2]]\n==== \\4\n\n++++\n', root_string, flags=re.S)
     root_string = root_string + "\n++++\n"
   except Exception as e:
     exc_type, exc_obj, exc_tb = sys.exc_info()
@@ -488,7 +494,7 @@ def make_adoc(root_string, title_text, filename):
 
 def decrease_heading_levels(adoc):
   try:
-    adoc = re.sub("\n==", "\n=", adoc, flags=re.S)
+    adoc = re.sub(r"\n==", "\n=", adoc, flags=re.S)
   except Exception as e:
     exc_type, exc_obj, exc_tb = sys.exc_info()
     print("ERROR: ", e, exc_tb.tb_lineno)
@@ -521,29 +527,29 @@ def parse_header(header_path):
   try:
     with open(header_path) as h:
       content = h.read()
-    blocks = re.findall("^(\s*)(\*|\/\*\*)(\s*)(\s)(\*)(\s)(\\\\)(defgroup)([^}]*)(\@\})", content, re.M)
+    blocks = re.findall(r"^(\s*)(\*|\/\*\*)(\s*)(\s)(\*)(\s)(\\)(defgroup)([^}]*)(\@\})", content, re.M)
     for (a, b, c, d, e, f, g, h, i, j) in blocks:
-      items = i.split("\defgroup")
+      items = i.split(r"\defgroup")
       group_id = None
       for item in items:
         if group_id is None: # must be the first item in the list
-          m = re.match("(\s*)(\S*)(\s*)([^*]*)(.*?)(@\{)", item, re.S)
+          m = re.match(r"(\s*)(\S*)(\s*)([^*]*)(.*?)(@\{)", item, re.S)
           group_id = m.group(2)
           group_filename = "group_"+group_id+".html"
           group_filename = re.sub("_", "__", group_filename)
           group_name = m.group(4)
-          group_name = re.sub("\s*$", "", group_name, re.M)
+          group_name = re.sub(r"\s*$", "", group_name, re.M)
           group_desc = m.group(5)
-          group_desc = re.sub("\n", "", group_desc, re.M)
-          group_desc = re.sub("\*", "", group_desc, re.M)
-          group_desc = re.sub("^\s", "", group_desc, re.M)
+          group_desc = re.sub(r"\n", "", group_desc, re.M)
+          group_desc = re.sub(r"\*", "", group_desc, re.M)
+          group_desc = re.sub(r"^\s", "", group_desc, re.M)
           group_json = { 'group_id': group_id, 'name': group_name, 'description': group_desc, 'html': group_filename, 'subitems': [] }
           h_json.append(group_json)
         else:
           cleaned = item
-          cleaned = re.sub("\n*", "", cleaned, re.M)
-          cleaned = re.sub("^\s*", "", cleaned, re.M)
-          cleaned = re.sub("\s*\*\s*$", "", cleaned, re.M)
+          cleaned = re.sub(r"\n*", "", cleaned, re.M)
+          cleaned = re.sub(r"^\s*", "", cleaned, re.M)
+          cleaned = re.sub(r"\s*\*\s*$", "", cleaned, re.M)
           val = cleaned.split(" ")[0]
           filename = re.sub("_", "__", val)
           filename = "group__" + filename
@@ -705,12 +711,12 @@ def parse_individual_file(html_path, html_file, complete_json_mappings, updated_
     # read the input root
     with open(this_path) as h:
       html_content = h.read()
-      html_content = re.sub('<\!DOCTYPE html PUBLIC "-\/\/W3C\/\/DTD XHTML 1\.0 Transitional\/\/EN" "https:\/\/www\.w3\.org\/TR\/xhtml1\/DTD\/xhtml1-transitional\.dtd">', '', html_content)
+      html_content = re.sub(r'<\!DOCTYPE html PUBLIC "-\/\/W3C\/\/DTD XHTML 1\.0 Transitional\/\/EN" "https:\/\/www\.w3\.org\/TR\/xhtml1\/DTD\/xhtml1-transitional\.dtd">', '', html_content)
       html_content = re.sub('rel="stylesheet">', 'rel="stylesheet"/>', html_content)
       html_content = re.sub('&display=swap"', '"', html_content)
-      html_content = re.sub('<img src="logo-mobile\.svg" alt="Raspberry Pi">', '', html_content)
-      html_content = re.sub('<img src="logo\.svg" alt="Raspberry Pi">', '', html_content)
-      html_content = re.sub("<\!-- HTML header for doxygen \S*?-->", '', html_content)
+      html_content = re.sub(r'<img src="logo-mobile\.svg" alt="Raspberry Pi">', '', html_content)
+      html_content = re.sub(r'<img src="logo\.svg" alt="Raspberry Pi">', '', html_content)
+      html_content = re.sub(r"<\!-- HTML header for doxygen \S*?-->", '', html_content)
       html_content = re.sub(' xmlns="http://www.w3.org/1999/xhtml"', '', html_content)
       root = etree.HTML(html_content)
     
diff --git a/tests/run_documentation_tests.sh b/tests/run_documentation_tests.sh
@@ -0,0 +1,10 @@
+#! /bin/bash
+
+# run from the top level: ./tests/run_documentation_tests.sh
+
+python3 -m unittest tests.test_create_build_adoc_doxygen
+python3 -m unittest tests.test_create_build_adoc_include
+python3 -m unittest tests.test_create_build_adoc
+python3 -m unittest tests.test_create_nav
+cd scripts/
+python3 -m unittest tests.test_doxygen_adoc

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`pyyaml == 6.0.1`
`2`		`-lxml == 4.9.3`
	`2`	`+lxml`