Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement file not found for XIncludes #462

Merged
merged 2 commits into from
Jul 24, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
132 changes: 121 additions & 11 deletions libexec/daps-xmlwellformed
Original file line number Diff line number Diff line change
@@ -1,28 +1,121 @@
#!/usr/bin/env python3
#
# This script was needed as xmllint cannot ignore double xml:ids
# This script is needed as xmllint cannot ignore double IDs
# lxml
#
#
"""Performs a well-formedness check on XML.

* Does XInclude processing before any checks;
* Ignores non-unique IDs (attribute xml:id's)
* Does XInclude processing before any checks;
* Warns about files which cannot be found
* Ignores non-unique IDs (attributes xml:id or id)
"""

__author__ = "Thomas Schraitle"
__version__ = "0.2.0"

import argparse
import os
import sys
import textwrap
from lxml import etree

HERE = os.path.dirname(os.path.realpath(__file__))
XINCLUDE_XSLT = os.path.join(HERE, "xinclude.xsl")
SUSE_NS = "urn:x-suse:ns:python"



if etree.LXML_VERSION < (3, 4, 0):
print("ERROR: I need a minimum version of 3.4.0 of lxml.",
file=sys.stderr)
sys.exit(10)

if not os.path.exists(XINCLUDE_XSLT):
print("ERROR: Missing file %s" % XINCLUDE_XSLT, file=sys.stderr)
sys.exit(125)

# ------------------------------------------------------------
# Extension Functions in a SUSE namespace


def exists(context, f):
"""Test whether a path exists. Returns False for
broken symbolic links

:param context:
:param list f: list of path name (however, we
are only interested in the first
item
:return: True=Path exists, False otherwise
:rtype: bool
"""
f = f[0]
d = context.context_node.getroottree().docinfo.URL
d = os.path.dirname(d)
return os.path.exists(os.path.join(d, f))


def abspath(context, f):
"""Return the absolut path of the context node

:param context:
:param list f: list of path name (however, we
are only interested in the first
item
:return: absolut path
:rtype: str
"""
f = f[0]
d = context.context_node.getroottree().docinfo.URL
d = os.path.dirname(d)
return os.path.abspath(os.path.join(d, f))


# ------------------------------------------------------------
#

def check_wellformedness(xmlfile, xinclude=True):
def process_xinclude(tree):
"""Process the tree with a XSLT stylesheet which
resolves any XIncludes. Prints

:param tree: the ElementTree
"""
# This notation is needed for lxml <v4:
ns = etree.FunctionNamespace(SUSE_NS)
ns['exists'] = exists
ns['abspath'] = abspath

# tree.xinclude()
# HACK for lxml < v4.2.1:
# This test is needed to perform XInclude resolution on
# second and third levels:
# if list(tree.iter("{http://www.w3.org/2001/XInclude}include")):
# tree.xinclude()

# Let's use XSLT to handle XIncludes manually; this is needed
# for two reasons:
# 1. lxml seems to have a bug when resolving XIncludes on the
# second level
# 2. we need to handle cases where the file cannot be found
xitransform = etree.XSLT(etree.parse(XINCLUDE_XSLT))
warnfiles = []
try:
result = xitransform(tree)
for entry in xitransform.error_log:
level, msg = entry.message.split(':', maxsplit=1)
# print(level, msg, file=sys.stderr)
if level == 'WARN':
warnfiles.append(msg)
except etree.XSLTApplyError as err:
# We search for "Cannot resolve URI <FILENAME>"
uri = err.args[0].split("URI ")[-1]
warnfiles.append(uri)

return warnfiles


def check_wellformedness(xmlfile, warnings_as_errors=False, xinclude=True):
"""Checks a file for well-formedness

This only works with lxml >= 3.4.0 (because of collect_ids option)
Expand All @@ -38,13 +131,16 @@ def check_wellformedness(xmlfile, xinclude=True):
try:
tree = etree.parse(xmlfile, parser=xmlparser)
if xinclude:
tree.xinclude()
# HACK for lxml < v4.2.1:
# This test is needed to perform XInclude resolution on
# second and third levels:
if list(tree.iter("{http://www.w3.org/2001/XInclude}include")):
tree.xinclude()
r = process_xinclude(tree)
if r and warnings_as_errors:
raise ValueError(", ".join(r))
return 0

except ValueError as err:
print("ERROR: The following file(s) cannot be found:\n "
"%s" % err, file=sys.stderr)
return 20

except (etree.XMLSyntaxError, etree.XIncludeError) as err:
print("ERROR: %s" % err, file=sys.stderr)
print(textwrap.indent(str(err.error_log), prefix=" "), file=sys.stderr)
Expand All @@ -61,11 +157,23 @@ def parse_cli(args=None):
parser = argparse.ArgumentParser(description=__doc__.split("\n", 1)[0],
epilog=__doc__.split("\n", 1)[-1],
)
parser.add_argument("--version",
action="version",
version="%(prog)s {}".format(__version__),
)
parser.add_argument("--xinclude",
action="store_true",
default=False,
help="Do XInclude processing"
)
parser.add_argument("-W", "--warnings-as-errors",
action="store_true",
default=False,
help=("Flag to set the behaviour when "
"referenced files with XIncludes cannot "
"be found; "
"(default: %(default)s)")
)
parser.add_argument("xmlfile",
help="XML file to check well-formedness"
)
Expand All @@ -76,5 +184,7 @@ def parse_cli(args=None):

if __name__ == "__main__":
args = parse_cli()
result = check_wellformedness(args.xmlfile, args.xinclude)
result = check_wellformedness(args.xmlfile,
args.warnings_as_errors,
args.xinclude)
sys.exit(result)
45 changes: 45 additions & 0 deletions libexec/xinclude.xsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Pupose:
Process xi:include elements and return warning message if file
cannot be found

Parameters:
n/a

Input:
DocBook 4/5 document

Output:
nothing, only INFO or WARNING messages

Dependencies:
s:abspath and s:exists, two Python extension functions used by
the daps-xmlwellformed script

Author: Thomas Schraitle
Copyright (C) 2018 SUSE Linux GmbH

-->
<xsl:stylesheet version="1.0"
xmlns:xi="http://www.w3.org/2001/XInclude"
xmlns:s="urn:x-suse:ns:python"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">

<xsl:output method="text"/>
<xsl:template match="text()"/>

<xsl:template match="xi:include">
<xsl:variable name="abspath" select="s:abspath(@href)"/>
<xsl:choose>
<xsl:when test="s:exists(@href)">
<xsl:message>INFO: XIncluding <xsl:value-of select="$abspath"/></xsl:message>
<xsl:apply-templates select="document($abspath, .)"/>
</xsl:when>
<xsl:otherwise>
<xsl:message>WARN:<xsl:value-of select="@href"/></xsl:message>
</xsl:otherwise>
</xsl:choose>
</xsl:template>

</xsl:stylesheet>