Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

file 166 lines (140 sloc) 5.444 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
#!/usr/bin/env python

import sys, os, os.path, re, codecs

BUILD_SOURCE_FILE = os.path.join("src", "lxml", "xmlerror.pxi")
BUILD_DEF_FILE = os.path.join("src", "lxml", "includes", "xmlerror.pxd")

if len(sys.argv) < 2 or sys.argv[1].lower() in ('-h', '--help'):
    print("This script generates the constants in file %s" % BUILD_SOURCE_FILE)
    print("Call as")
    print(sys.argv[0], "/path/to/libxml2-doc-dir")
    sys.exit(len(sys.argv) > 1)

HTML_DIR = os.path.join(sys.argv[1], 'html')
os.stat(HTML_DIR) # raise an error if we can't find it

sys.path.insert(0, 'src')
from lxml import etree

# map enum name to Python variable name and alignment for constant name
ENUM_MAP = {
    'xmlErrorLevel' : ('__ERROR_LEVELS', 'XML_ERR_'),
    'xmlErrorDomain' : ('__ERROR_DOMAINS', 'XML_FROM_'),
    'xmlParserErrors' : ('__PARSER_ERROR_TYPES', 'XML_'),
# 'xmlXPathError' : ('__XPATH_ERROR_TYPES', ''),
# 'xmlSchemaValidError' : ('__XMLSCHEMA_ERROR_TYPES', 'XML_'),
    'xmlRelaxNGValidErr' : ('__RELAXNG_ERROR_TYPES', 'XML_'),
    }

ENUM_ORDER = (
    'xmlErrorLevel',
    'xmlErrorDomain',
    'xmlParserErrors',
# 'xmlXPathError',
# 'xmlSchemaValidError',
    'xmlRelaxNGValidErr')

COMMENT = """
# This section is generated by the script '%s'.

""" % os.path.basename(sys.argv[0])

def split(lines):
    lines = iter(lines)
    pre = []
    for line in lines:
        pre.append(line)
        if line.startswith('#') and "BEGIN: GENERATED CONSTANTS" in line:
            break
    pre.append('')
    for line in lines:
        if line.startswith('#') and "END: GENERATED CONSTANTS" in line:
            break
    post = ['', line]
    post.extend(lines)
    post.append('')
    return pre, post

def regenerate_file(filename, result):
    # read .pxi source file
    f = codecs.open(filename, 'r', encoding="utf-8")
    pre, post = split(f)
    f.close()

    # write .pxi source file
    f = codecs.open(filename, 'w', encoding="utf-8")
    f.write(''.join(pre))
    f.write(COMMENT)
    f.write('\n'.join(result))
    f.write(''.join(post))
    f.close()

collect_text = etree.XPath("string()")
find_enums = etree.XPath(
    "//html:pre[@class = 'programlisting' and contains(text(), 'Enum')]",
    namespaces = {'html' : 'http://www.w3.org/1999/xhtml'})

def parse_enums(html_dir, html_filename, enum_dict):
    PARSE_ENUM_NAME = re.compile('\s*enum\s+(\w+)\s*{', re.I).match
    PARSE_ENUM_VALUE = re.compile('\s*=\s+([0-9]+)\s*(?::\s*(.*))?').match
    tree = etree.parse(os.path.join(html_dir, html_filename))
    enums = find_enums(tree)
    for enum in enums:
        enum_name = PARSE_ENUM_NAME(collect_text(enum))
        if not enum_name:
            continue
        enum_name = enum_name.group(1)
        if enum_name not in ENUM_MAP:
            continue
        print("Found enum", enum_name)
        entries = []
        for child in enum:
            name = child.text
            match = PARSE_ENUM_VALUE(child.tail)
            if not match:
                print("Ignoring enum %s (failed to parse field '%s')" % (
                        enum_name, name))
                break
            value, descr = match.groups()
            entries.append((name, int(value), descr))
        else:
            enum_dict[enum_name] = entries
    return enum_dict

enum_dict = {}
parse_enums(HTML_DIR, 'libxml-xmlerror.html', enum_dict)
#parse_enums(HTML_DIR, 'libxml-xpath.html', enum_dict)
#parse_enums(HTML_DIR, 'libxml-xmlschemas.html', enum_dict)
parse_enums(HTML_DIR, 'libxml-relaxng.html', enum_dict)

# regenerate source files
pxi_result = []
append_pxi = pxi_result.append
pxd_result = []
append_pxd = pxd_result.append

append_pxd('cdef extern from "libxml/xmlerror.h":')
append_pxi('''\
# Constants are stored in tuples of strings, for which Cython generates very
# efficient setup code. To parse them, iterate over the tuples and parse each
# line in each string independently. Tuples of strings (instead of a plain
# string) are required as some C-compilers of a certain well-known OS vendor
# cannot handle strings that are a few thousand bytes in length.
''')

ctypedef_indent = ' '*4
constant_indent = ctypedef_indent*2

for enum_name in ENUM_ORDER:
    constants = enum_dict[enum_name]
    pxi_name, prefix = ENUM_MAP[enum_name]

    append_pxd(ctypedef_indent + 'ctypedef enum %s:' % enum_name)
    append_pxi('cdef object %s = (u"""\\' % pxi_name)

    prefix_len = len(prefix)
    length = 2 # each string ends with '\n\0'
    for name, val, descr in constants:
        if descr and descr != str(val):
            line = '%-50s = %7d # %s' % (name, val, descr)
        else:
            line = '%-50s = %7d' % (name, val)
        append_pxd(constant_indent + line)

        if name[:prefix_len] == prefix and len(name) > prefix_len:
            name = name[prefix_len:]
        line = '%s=%d' % (name, val)
        if length + len(line) >= 2040: # max string length in MSVC is 2048
            append_pxi('""",')
            append_pxi('u"""\\')
            length = 2 # each string ends with '\n\0'
        append_pxi(line)
        length += len(line) + 2 # + '\n\0'

    append_pxd('')
    append_pxi('""",)')
    append_pxi('')

# write source files
print("Updating file %s" % BUILD_SOURCE_FILE)
regenerate_file(BUILD_SOURCE_FILE, pxi_result)

print("Updating file %s" % BUILD_DEF_FILE)
regenerate_file(BUILD_DEF_FILE, pxd_result)

print("Done")
Something went wrong with that request. Please try again.