diff --git a/CHANGES.rst b/CHANGES.rst index c044d9a5..b845075e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -3,7 +3,15 @@ Changes In next release ... -- +- An XML document provided as a string (i.e. decoded) now correctly + has its content encoding parsed. + +- Boolean attributes are now automatically configured for templates in + non-XML mode, presuming that we're being used to generate HTML. + + This means that the same loading mechanism can be used for both XML- + and HTML-based templates. + 4.1.0 (2023-08-29) ------------------ diff --git a/src/chameleon/template.py b/src/chameleon/template.py index 615b4802..d32e316a 100644 --- a/src/chameleon/template.py +++ b/src/chameleon/template.py @@ -19,10 +19,12 @@ from .utils import DebuggingOutputStream from .utils import Scope from .utils import create_formatted_exception +from .utils import detect_encoding from .utils import join from .utils import mangle from .utils import raise_with_traceback from .utils import read_bytes +from .utils import read_xml_encoding from .utils import value_repr @@ -87,6 +89,7 @@ class BaseTemplate: """ default_encoding = "utf-8" + default_content_type = None # This attribute is strictly informational in this template class # and is used in exception formatting. It may be set on @@ -224,11 +227,15 @@ def write(self, body): body, encoding, content_type = read_bytes( body, self.default_encoding ) + elif body.startswith(' - - + + diff --git a/src/chameleon/tests/outputs/071.pt b/src/chameleon/tests/outputs/071.pt index fb58cff4..dbf2d155 100644 --- a/src/chameleon/tests/outputs/071.pt +++ b/src/chameleon/tests/outputs/071.pt @@ -1,7 +1,7 @@ - - + + diff --git a/src/chameleon/tests/test_bools_plus_sniffing.py b/src/chameleon/tests/test_bools_plus_sniffing.py new file mode 100644 index 00000000..cecdc15a --- /dev/null +++ b/src/chameleon/tests/test_bools_plus_sniffing.py @@ -0,0 +1,380 @@ +import difflib +import unittest + +from chameleon import PageTemplate + + +xml_bytes = b"""\ + + + +""" + +xml_w_enc_bytes = b"""\ + + + +""" + +html5_bytes = b"""\ + + + + Title of document + + +
+ + +
+ + +""" + +html5_w_ct_n_enc_bytes = b"""\ + + + + + Title of document + + +
+ + +
+ + +""" + + +class BaseTestCase(unittest.TestCase): + def get_template(self, text): + template = PageTemplate(text) + return template + + def get_template_bytes(self): + return self.get_template(self.input_bytes) + + def get_template_str(self): + return self.get_template(self.input_bytes.decode('utf-8')) + + def assert_same(self, s1, s2): + L1 = s1.splitlines() + L1 = list(filter(None, [' '.join(x.split()).strip() for x in L1])) + L2 = s2.splitlines() + L2 = list(filter(None, [' '.join(x.split()).strip() for x in L2])) + diff = '\n'.join(list(difflib.unified_diff(L1, L2))) + assert diff == '', diff + + +class XMLTestCase(BaseTestCase): + + input_bytes = xml_bytes + encoding = None + + def test_bytes_content_type(self): + template = self.get_template_bytes() + self.assertEqual(template.content_type, 'text/xml') + + def test_bytes_encoding(self): + template = self.get_template_bytes() + self.assertEqual(template.content_encoding, 'utf-8') + + def test_str_content_type(self): + template = self.get_template_str() + self.assertEqual(template.content_type, 'text/xml') + + def test_str_encoding(self): + template = self.get_template_str() + self.assertEqual(template.content_encoding, self.encoding) + + def test_bytes_checked_true(self): + template = self.get_template_bytes() + expected = """ + + + + """ + result = template(checked=True) + self.assert_same(expected, result) + + def test_bytes_checked_false(self): + template = self.get_template_bytes() + expected = """ + + + + """ + result = template(checked=False) + self.assert_same(expected, result) + + def test_bytes_checked_None(self): + template = self.get_template_bytes() + expected = """ + + + + """ + result = template(checked=None) + self.assert_same(expected, result) + + def test_bytes_checked_default(self): + template = self.get_template_bytes() + expected = """ + + + + """ + result = template(checked=template.default_marker.value) + self.assert_same(expected, result) + + def test_str_checked_true(self): + template = self.get_template_str() + expected = """ + + + + """ + result = template(checked=True) + self.assert_same(expected, result) + + def test_str_checked_false(self): + template = self.get_template_str() + expected = """ + + + + """ + result = template(checked=False) + self.assert_same(expected, result) + + def test_str_checked_None(self): + template = self.get_template_str() + expected = """ + + + + """ + result = template(checked=None) + self.assert_same(expected, result) + + def test_str_checked_default(self): + template = self.get_template_str() + expected = """ + + + + """ + result = template(checked=template.default_marker.value) + self.assert_same(expected, result) + + +class XMLWithEncodingTestCase(BaseTestCase): + + input_bytes = xml_w_enc_bytes + encoding = 'ascii' + + def test_bytes_encoding(self): + template = self.get_template_bytes() + self.assertEqual(template.content_encoding, self.encoding) + + def test_str_encoding(self): + template = self.get_template_str() + self.assertEqual(template.content_encoding, self.encoding) + + +class HTML5TestCase(BaseTestCase): + + input_bytes = html5_bytes + + def test_bytes_content_type(self): + template = self.get_template_bytes() + self.assertEqual(template.content_type, 'text/html') + + def test_bytes_encoding(self): + template = self.get_template_bytes() + self.assertEqual(template.content_encoding, 'utf-8') + + def test_str_content_type(self): + template = self.get_template_str() + self.assertEqual(template.content_type, 'text/html') + + def test_str_encoding(self): + template = self.get_template_str() + self.assertEqual(template.content_encoding, 'utf-8') + + def test_bytes_checked_true(self): + template = self.get_template_bytes() + expected = """ + + + + Title of document + + +
+ + +
+ + + """ + result = template(checked=True) + self.assert_same(expected, result) + + def test_bytes_checked_false(self): + template = self.get_template_bytes() + expected = """ + + + + Title of document + + +
+ + +
+ + + """ + result = template(checked=False) + self.assert_same(expected, result) + + def test_bytes_checked_None(self): + template = self.get_template_bytes() + expected = """ + + + + Title of document + + +
+ + +
+ + + """ + result = template(checked=None) + self.assert_same(expected, result) + + def test_bytes_checked_default(self): + template = self.get_template_bytes() + expected = """ + + + + Title of document + + +
+ + +
+ + + """ + result = template(checked=template.default_marker.value) + self.assert_same(expected, result) + + def test_str_checked_true(self): + template = self.get_template_str() + expected = """ + + + + Title of document + + +
+ + +
+ + + """ + result = template(checked=True) + self.assert_same(expected, result) + + def test_str_checked_false(self): + template = self.get_template_str() + expected = """ + + + + Title of document + + +
+ + +
+ + + """ + result = template(checked=False) + self.assert_same(expected, result) + + def test_str_checked_None(self): + template = self.get_template_str() + expected = """ + + + + Title of document + + +
+ + +
+ + + """ + result = template(checked=None) + self.assert_same(expected, result) + + def test_str_checked_default(self): + template = self.get_template_str() + expected = """ + + + + Title of document + + +
+ + +
+ + + """ + result = template(checked=template.default_marker.value) + self.assert_same(expected, result) + + +class HTML5WithContentTypeAndEncodingTestCase(BaseTestCase): + + input_bytes = html5_w_ct_n_enc_bytes + + def test_bytes_content_type(self): + template = self.get_template_bytes() + self.assertEqual(template.content_type, 'foo/bar') + + def test_bytes_encoding(self): + template = self.get_template_bytes() + self.assertEqual(template.content_encoding, 'utf-8') + + def test_str_content_type(self): + template = self.get_template_str() + self.assertEqual(template.content_type, 'foo/bar') + + def test_str_encoding(self): + template = self.get_template_str() + self.assertEqual(template.content_encoding, 'utf-8') diff --git a/src/chameleon/tests/test_templates.py b/src/chameleon/tests/test_templates.py index 84d3faa8..5de98792 100644 --- a/src/chameleon/tests/test_templates.py +++ b/src/chameleon/tests/test_templates.py @@ -576,18 +576,20 @@ def test_boolean_attributes(self): '', '', '', + '', # noqa: E501 line too long + '', '', '', '', '', # noqa: E501 line too long - )), - boolean_attributes={ - 'checked'}) + )) + ) self.assertEqual( template( dynamic_true={"checked": True}, - dynamic_false={"checked": False} + dynamic_false={"checked": False}, + dynamic_marker=template.default_marker.value, ), "\n".join(( '', @@ -597,6 +599,8 @@ def test_boolean_attributes(self): '', '', '', + '', + '', '', '', '', @@ -659,7 +663,7 @@ def test_windows_line_endings(self): ' class="foo"\r\n' ' tal:content="string:bar"/>') self.assertEqual(template(), - 'bar') def test_digest(self): diff --git a/src/chameleon/utils.py b/src/chameleon/utils.py index 5fbcdee0..75d32750 100644 --- a/src/chameleon/utils.py +++ b/src/chameleon/utils.py @@ -85,7 +85,6 @@ def read_bytes(body, default_encoding): if body.startswith(_xml_decl): content_type = "text/xml" - encoding = read_xml_encoding(body) or default_encoding else: content_type, encoding = detect_encoding(body, default_encoding) diff --git a/src/chameleon/zpt/template.py b/src/chameleon/zpt/template.py index 405e87d1..be91ad44 100644 --- a/src/chameleon/zpt/template.py +++ b/src/chameleon/zpt/template.py @@ -27,6 +27,23 @@ bytes = str +BOOLEAN_HTML_ATTRIBUTES = [ + # From http://www.w3.org/TR/xhtml1/#guidelines (C.10) + "compact", + "nowrap", + "ismap", + "declare", + "noshade", + "checked", + "disabled", + "readonly", + "multiple", + "selected", + "noresize", + "defer", +] + + class PageTemplate(BaseTemplate): """Constructor for the page template language. @@ -73,6 +90,10 @@ class PageTemplate(BaseTemplate): The special return value ``default`` drops or inserts the attribute based on the value element attribute value. + The default setting is to autodetect if we're in HTML-mode and + provide the standard set of boolean attributes for this + document type. + ``translate`` Use this option to set a translation function. @@ -171,12 +192,13 @@ def translate(msgid, domain=None, mapping=None, default=None, } default_expression = 'python' + default_content_type = 'text/html' translate = staticmethod(simple_translate) encoding = None - boolean_attributes = set() + boolean_attributes = None mode = "xml" @@ -222,11 +244,22 @@ def expression_parser(self): return ExpressionParser(self.expression_types, self.default_expression) def parse(self, body): + boolean_attributes = self.boolean_attributes + + if self.content_type != 'text/xml': + if boolean_attributes is None: + boolean_attributes = BOOLEAN_HTML_ATTRIBUTES + + # In non-XML mode, we support various platform-specific + # line endings and convert them to the UNIX newline + # character. + body = body.replace('\r\n', '\n').replace('\r', '\n') + return MacroProgram( body, self.mode, self.filename, escape=True if self.mode == "xml" else False, default_marker=self.default_marker, - boolean_attributes=self.boolean_attributes, + boolean_attributes=boolean_attributes or frozenset([]), implicit_i18n_translate=self.implicit_i18n_translate, implicit_i18n_attributes=self.implicit_i18n_attributes, trim_attribute_space=self.trim_attribute_space,