Permalink
Browse files

fix regex and add test for it

  • Loading branch information...
1 parent 5b0b3d6 commit cbfedcd89ab2d2274a7d1266633ea7887524d76c @scoder scoder committed Mar 29, 2013
Showing with 23 additions and 2 deletions.
  1. +2 −2 src/lxml/apihelpers.pxi
  2. +21 −0 src/lxml/tests/test_xslt.py
View
@@ -577,7 +577,7 @@ cdef list _collectAttributes(xmlNode* c_node, int collecttype):
cdef object __RE_XML_ENCODING
__RE_XML_ENCODING = re.compile(
- ur'^(\s*<\?\s*xml[^>]+)\s+encoding\s*=\s*["\'][^"\']*["\']\s*', re.U)
+ ur'^(<\?xml[^>]+)\s+encoding\s*=\s*["\'][^"\']*["\'](\s*\?>|)', re.U)
cdef object __REPLACE_XML_ENCODING
__REPLACE_XML_ENCODING = __RE_XML_ENCODING.sub
@@ -587,7 +587,7 @@ __HAS_XML_ENCODING = __RE_XML_ENCODING.match
cdef object _stripEncodingDeclaration(object xml_string):
# this is a hack to remove the XML encoding declaration from unicode
- return __REPLACE_XML_ENCODING(ur'\g<1> ', xml_string)
+ return __REPLACE_XML_ENCODING(ur'\g<1>\g<2>', xml_string)
cdef bint _hasEncodingDeclaration(object xml_string):
# check if a (unicode) string has an XML encoding declaration
@@ -198,6 +198,27 @@ def test_xslt_unicode(self):
self.assertEqual(expected,
unicode(res))
+ def test_xslt_unicode_standalone(self):
+ tree = self.parse(_bytes('<a><b>\\uF8D2</b><c>\\uF8D2</c></a>'
+ ).decode("unicode_escape"))
+ style = self.parse('''\
+<xsl:stylesheet version="1.0"
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+ <xsl:output encoding="UTF-16" standalone="no"/>
+ <xsl:template match="/">
+ <foo><xsl:value-of select="/a/b/text()" /></foo>
+ </xsl:template>
+</xsl:stylesheet>''')
+
+ st = etree.XSLT(style)
+ res = st(tree)
+ expected = _bytes('''\
+<?xml version="1.0" standalone="no"?>
+<foo>\\uF8D2</foo>
+''').decode("unicode_escape")
+ self.assertEqual(expected,
+ unicode(res))
+
def test_xslt_input(self):
style = self.parse('''\
<xsl:stylesheet version="1.0"

0 comments on commit cbfedcd

Please sign in to comment.