Skip to content

Commit

Permalink
Implement file not found for XIncludes (#462)
Browse files Browse the repository at this point in the history
* Implement file not found for XIncludes in script libexec/daps-xmlwellformed
* Add option -W/--warnings-as-errors to turn any warnings
  about missing files into errors
* Integrate new xinclude.xsl stylesheets which handles manual
  resolution of XInclude elements
* Introduce two new Python extension functions: s:exists and s:abspath.
  Both are basically mappend to os.path.exists and os.path.abspath
* This code supports lxml >V3.4.0
  This works on docserv (Leap 42.3) now
  • Loading branch information
tomschr committed Jul 24, 2018
1 parent 92b1177 commit e80ff3d
Show file tree
Hide file tree
Showing 2 changed files with 166 additions and 11 deletions.
132 changes: 121 additions & 11 deletions libexec/daps-xmlwellformed
Original file line number Diff line number Diff line change
@@ -1,28 +1,121 @@
#!/usr/bin/env python3
#
# This script was needed as xmllint cannot ignore double xml:ids
# This script is needed as xmllint cannot ignore double IDs
# lxml
#
#
"""Performs a well-formedness check on XML.
* Does XInclude processing before any checks;
* Ignores non-unique IDs (attribute xml:id's)
* Does XInclude processing before any checks;
* Warns about files which cannot be found
* Ignores non-unique IDs (attributes xml:id or id)
"""

__author__ = "Thomas Schraitle"
__version__ = "0.2.0"

import argparse
import os
import sys
import textwrap
from lxml import etree

HERE = os.path.dirname(os.path.realpath(__file__))
XINCLUDE_XSLT = os.path.join(HERE, "xinclude.xsl")
SUSE_NS = "urn:x-suse:ns:python"



if etree.LXML_VERSION < (3, 4, 0):
print("ERROR: I need a minimum version of 3.4.0 of lxml.",
file=sys.stderr)
sys.exit(10)

if not os.path.exists(XINCLUDE_XSLT):
print("ERROR: Missing file %s" % XINCLUDE_XSLT, file=sys.stderr)
sys.exit(125)

# ------------------------------------------------------------
# Extension Functions in a SUSE namespace


def exists(context, f):
"""Test whether a path exists. Returns False for
broken symbolic links
:param context:
:param list f: list of path name (however, we
are only interested in the first
item
:return: True=Path exists, False otherwise
:rtype: bool
"""
f = f[0]
d = context.context_node.getroottree().docinfo.URL
d = os.path.dirname(d)
return os.path.exists(os.path.join(d, f))


def abspath(context, f):
"""Return the absolut path of the context node
:param context:
:param list f: list of path name (however, we
are only interested in the first
item
:return: absolut path
:rtype: str
"""
f = f[0]
d = context.context_node.getroottree().docinfo.URL
d = os.path.dirname(d)
return os.path.abspath(os.path.join(d, f))


# ------------------------------------------------------------
#

def check_wellformedness(xmlfile, xinclude=True):
def process_xinclude(tree):
"""Process the tree with a XSLT stylesheet which
resolves any XIncludes. Prints
:param tree: the ElementTree
"""
# This notation is needed for lxml <v4:
ns = etree.FunctionNamespace(SUSE_NS)
ns['exists'] = exists
ns['abspath'] = abspath

# tree.xinclude()
# HACK for lxml < v4.2.1:
# This test is needed to perform XInclude resolution on
# second and third levels:
# if list(tree.iter("{http://www.w3.org/2001/XInclude}include")):
# tree.xinclude()

# Let's use XSLT to handle XIncludes manually; this is needed
# for two reasons:
# 1. lxml seems to have a bug when resolving XIncludes on the
# second level
# 2. we need to handle cases where the file cannot be found
xitransform = etree.XSLT(etree.parse(XINCLUDE_XSLT))
warnfiles = []
try:
result = xitransform(tree)
for entry in xitransform.error_log:
level, msg = entry.message.split(':', maxsplit=1)
# print(level, msg, file=sys.stderr)
if level == 'WARN':
warnfiles.append(msg)
except etree.XSLTApplyError as err:
# We search for "Cannot resolve URI <FILENAME>"
uri = err.args[0].split("URI ")[-1]
warnfiles.append(uri)

return warnfiles


def check_wellformedness(xmlfile, warnings_as_errors=False, xinclude=True):
"""Checks a file for well-formedness
This only works with lxml >= 3.4.0 (because of collect_ids option)
Expand All @@ -38,13 +131,16 @@ def check_wellformedness(xmlfile, xinclude=True):
try:
tree = etree.parse(xmlfile, parser=xmlparser)
if xinclude:
tree.xinclude()
# HACK for lxml < v4.2.1:
# This test is needed to perform XInclude resolution on
# second and third levels:
if list(tree.iter("{http://www.w3.org/2001/XInclude}include")):
tree.xinclude()
r = process_xinclude(tree)
if r and warnings_as_errors:
raise ValueError(", ".join(r))
return 0

except ValueError as err:
print("ERROR: The following file(s) cannot be found:\n "
"%s" % err, file=sys.stderr)
return 20

except (etree.XMLSyntaxError, etree.XIncludeError) as err:
print("ERROR: %s" % err, file=sys.stderr)
print(textwrap.indent(str(err.error_log), prefix=" "), file=sys.stderr)
Expand All @@ -61,11 +157,23 @@ def parse_cli(args=None):
parser = argparse.ArgumentParser(description=__doc__.split("\n", 1)[0],
epilog=__doc__.split("\n", 1)[-1],
)
parser.add_argument("--version",
action="version",
version="%(prog)s {}".format(__version__),
)
parser.add_argument("--xinclude",
action="store_true",
default=False,
help="Do XInclude processing"
)
parser.add_argument("-W", "--warnings-as-errors",
action="store_true",
default=False,
help=("Flag to set the behaviour when "
"referenced files with XIncludes cannot "
"be found; "
"(default: %(default)s)")
)
parser.add_argument("xmlfile",
help="XML file to check well-formedness"
)
Expand All @@ -76,5 +184,7 @@ def parse_cli(args=None):

if __name__ == "__main__":
args = parse_cli()
result = check_wellformedness(args.xmlfile, args.xinclude)
result = check_wellformedness(args.xmlfile,
args.warnings_as_errors,
args.xinclude)
sys.exit(result)
45 changes: 45 additions & 0 deletions libexec/xinclude.xsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Pupose:
Process xi:include elements and return warning message if file
cannot be found
Parameters:
n/a
Input:
DocBook 4/5 document
Output:
nothing, only INFO or WARNING messages
Dependencies:
s:abspath and s:exists, two Python extension functions used by
the daps-xmlwellformed script
Author: Thomas Schraitle
Copyright (C) 2018 SUSE Linux GmbH
-->
<xsl:stylesheet version="1.0"
xmlns:xi="http://www.w3.org/2001/XInclude"
xmlns:s="urn:x-suse:ns:python"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">

<xsl:output method="text"/>
<xsl:template match="text()"/>

<xsl:template match="xi:include">
<xsl:variable name="abspath" select="s:abspath(@href)"/>
<xsl:choose>
<xsl:when test="s:exists(@href)">
<xsl:message>INFO: XIncluding <xsl:value-of select="$abspath"/></xsl:message>
<xsl:apply-templates select="document($abspath, .)"/>
</xsl:when>
<xsl:otherwise>
<xsl:message>WARN:<xsl:value-of select="@href"/></xsl:message>
</xsl:otherwise>
</xsl:choose>
</xsl:template>

</xsl:stylesheet>

0 comments on commit e80ff3d

Please sign in to comment.