Skip to content

Commit

Permalink
Parse libxml2 error constants from libxml2-api.xml instead of the HTM…
Browse files Browse the repository at this point in the history
…L sources to avoid having to generate the documentation.

Also avoid actually writing the output files if there are no changes, to avoid useless rebuilds.
  • Loading branch information
scoder committed Feb 18, 2022
1 parent 064ff1f commit 9660889
Showing 1 changed file with 109 additions and 106 deletions.
215 changes: 109 additions & 106 deletions update-error-constants.py
Expand Up @@ -2,23 +2,14 @@

from __future__ import print_function, absolute_import

import sys, os, os.path, re, codecs
import operator
import os.path
import sys
import xml.etree.ElementTree as ET

BUILD_SOURCE_FILE = os.path.join("src", "lxml", "xmlerror.pxi")
BUILD_DEF_FILE = os.path.join("src", "lxml", "includes", "xmlerror.pxd")

if len(sys.argv) < 2 or sys.argv[1].lower() in ('-h', '--help'):
print("This script generates the constants in file %s" % BUILD_SOURCE_FILE)
print("Call as")
print(sys.argv[0], "/path/to/libxml2-doc-dir")
sys.exit(len(sys.argv) > 1)

HTML_DIR = os.path.join(sys.argv[1], 'html')
os.stat(HTML_DIR) # raise an error if we can't find it

sys.path.insert(0, 'src')
from lxml import etree

# map enum name to Python variable name and alignment for constant name
ENUM_MAP = {
'xmlErrorLevel' : ('__ERROR_LEVELS', 'XML_ERR_'),
Expand All @@ -42,6 +33,7 @@
""" % os.path.basename(sys.argv[0])


def split(lines):
lines = iter(lines)
pre = []
Expand All @@ -50,108 +42,119 @@ def split(lines):
if line.startswith('#') and "BEGIN: GENERATED CONSTANTS" in line:
break
pre.append('')
old = []
for line in lines:
if line.startswith('#') and "END: GENERATED CONSTANTS" in line:
break
old.append(line.rstrip('\n'))
post = ['', line]
post.extend(lines)
post.append('')
return pre, post
return pre, old, post


def regenerate_file(filename, result):
new = COMMENT + '\n'.join(result)

# read .pxi source file
f = codecs.open(filename, 'r', encoding="utf-8")
pre, post = split(f)
f.close()
with open(filename, 'r', encoding="utf-8") as f:
pre, old, post = split(f)

if new.strip() == '\n'.join(old).strip():
# no changes
return False

# write .pxi source file
f = codecs.open(filename, 'w', encoding="utf-8")
f.write(''.join(pre))
f.write(COMMENT)
f.write('\n'.join(result))
f.write(''.join(post))
f.close()

collect_text = etree.XPath("string()")
find_enums = etree.XPath(
"//html:pre[@class = 'programlisting' and contains(text(), 'Enum')]",
namespaces = {'html' : 'http://www.w3.org/1999/xhtml'})

def parse_enums(html_dir, html_filename, enum_dict):
PARSE_ENUM_NAME = re.compile(r'\s*enum\s+(\w+)\s*{', re.I).match
PARSE_ENUM_VALUE = re.compile(r'\s*=\s+([0-9]+)\s*(?::\s*(.*))?').match
tree = etree.parse(os.path.join(html_dir, html_filename))
enums = find_enums(tree)
for enum in enums:
enum_name = PARSE_ENUM_NAME(collect_text(enum))
if not enum_name:
continue
enum_name = enum_name.group(1)
if enum_name not in ENUM_MAP:
with open(filename, 'w', encoding="utf-8") as f:
f.write(''.join(pre))
f.write(new)
f.write(''.join(post))

return True


def parse_enums(doc_dir, api_filename, enum_dict):
tree = ET.parse(os.path.join(doc_dir, api_filename))
for enum in tree.iterfind('symbols/enum'):
enum_type = enum.get('type')
if enum_type not in ENUM_MAP:
continue
print("Found enum", enum_name)
entries = []
for child in enum:
name = child.text
match = PARSE_ENUM_VALUE(child.tail)
if not match:
print("Ignoring enum %s (failed to parse field '%s')" % (
enum_name, name))
break
value, descr = match.groups()
entries.append((name, int(value), descr))
else:
enum_dict[enum_name] = entries
return enum_dict

enum_dict = {}
parse_enums(HTML_DIR, 'libxml-xmlerror.html', enum_dict)
#parse_enums(HTML_DIR, 'libxml-xpath.html', enum_dict)
#parse_enums(HTML_DIR, 'libxml-xmlschemas.html', enum_dict)
parse_enums(HTML_DIR, 'libxml-relaxng.html', enum_dict)

# regenerate source files
pxi_result = []
append_pxi = pxi_result.append
pxd_result = []
append_pxd = pxd_result.append

append_pxd('cdef extern from "libxml/xmlerror.h":')

ctypedef_indent = ' '*4
constant_indent = ctypedef_indent*2

for enum_name in ENUM_ORDER:
constants = enum_dict[enum_name]
pxi_name, prefix = ENUM_MAP[enum_name]

append_pxd(ctypedef_indent + 'ctypedef enum %s:' % enum_name)
append_pxi('cdef object %s = """\\' % pxi_name)

prefix_len = len(prefix)
length = 2 # each string ends with '\n\0'
for name, val, descr in constants:
if descr and descr != str(val):
line = '%-50s = %7d # %s' % (name, val, descr)
else:
line = '%-50s = %7d' % (name, val)
append_pxd(constant_indent + line)

if name[:prefix_len] == prefix and len(name) > prefix_len:
name = name[prefix_len:]
line = '%s=%d' % (name, val)
append_pxi(line)
length += len(line) + 2 # + '\n\0'

append_pxd('')
append_pxi('"""')
append_pxi('')

# write source files
print("Updating file %s" % BUILD_SOURCE_FILE)
regenerate_file(BUILD_SOURCE_FILE, pxi_result)

print("Updating file %s" % BUILD_DEF_FILE)
regenerate_file(BUILD_DEF_FILE, pxd_result)

print("Done")
entries = enum_dict.get(enum_type)
if not entries:
print("Found enum", enum_type)
entries = enum_dict[enum_type] = []
entries.append((
enum.get('name'),
int(enum.get('value')),
enum.get('info', '').strip(),
))


def main(doc_dir):
enum_dict = {}
parse_enums(doc_dir, 'libxml2-api.xml', enum_dict)
#parse_enums(doc_dir, 'libxml-xmlerror.html', enum_dict)
#parse_enums(doc_dir, 'libxml-xpath.html', enum_dict)
#parse_enums(doc_dir, 'libxml-xmlschemas.html', enum_dict)
#parse_enums(doc_dir, 'libxml-relaxng.html', enum_dict)

# regenerate source files
pxi_result = []
append_pxi = pxi_result.append
pxd_result = []
append_pxd = pxd_result.append

append_pxd('cdef extern from "libxml/xmlerror.h":')

ctypedef_indent = ' '*4
constant_indent = ctypedef_indent*2

for enum_name in ENUM_ORDER:
constants = enum_dict[enum_name]
constants.sort(key=operator.itemgetter(1))
pxi_name, prefix = ENUM_MAP[enum_name]

append_pxd(ctypedef_indent + 'ctypedef enum %s:' % enum_name)
append_pxi('cdef object %s = """\\' % pxi_name)

prefix_len = len(prefix)
length = 2 # each string ends with '\n\0'
for name, val, descr in constants:
if descr and descr != str(val):
line = '%-50s = %7d # %s' % (name, val, descr)
else:
line = '%-50s = %7d' % (name, val)
append_pxd(constant_indent + line)

if name[:prefix_len] == prefix and len(name) > prefix_len:
name = name[prefix_len:]
line = '%s=%d' % (name, val)
append_pxi(line)
length += len(line) + 2 # + '\n\0'

append_pxd('')
append_pxi('"""')
append_pxi('')

# write source files
print("Updating file %s" % BUILD_SOURCE_FILE)
updated = regenerate_file(BUILD_SOURCE_FILE, pxi_result)
if not updated:
print("No changes.")

print("Updating file %s" % BUILD_DEF_FILE)
updated = regenerate_file(BUILD_DEF_FILE, pxd_result)
if not updated:
print("No changes.")

print("Done")


if __name__ == "__main__":
if len(sys.argv) < 2 or sys.argv[1].lower() in ('-h', '--help'):
print("This script generates the constants in file %s" % BUILD_SOURCE_FILE)
print("Call as")
print(sys.argv[0], "/path/to/libxml2-doc-dir")
sys.exit(len(sys.argv) > 1)

main(sys.argv[1])

0 comments on commit 9660889

Please sign in to comment.