- Now using MarkupSafe for HTML escaping,

i.e. in place of cgi.escape(). Faster C-based implementation and also escapes single quotes for additional security. Supports the __html__ attribute for the given expression as well. When using "disable_unicode" mode, a pure Python HTML escaper function is used which also quotes single quotes. Note that Pylons by default doesn't use Mako's filter - check your environment.py file.
sqlalchemy · Jun 22, 2010 · a0354c3 · a0354c3
1 parent c387465
commit a0354c3
Show file tree

Hide file tree

Showing 9 changed files with 77 additions and 17 deletions.
diff --git a/CHANGES b/CHANGES
@@ -1,4 +1,19 @@
 0.3.4
+- Now using MarkupSafe for HTML escaping,
+  i.e. in place of cgi.escape().  Faster
+  C-based implementation and also escapes
+  single quotes for additional security.
+  Supports the __html__ attribute for
+  the given expression as well.
+
+  When using "disable_unicode" mode,
+  a pure Python HTML escaper function
+  is used which also quotes single quotes.
+
+  Note that Pylons by default doesn't 
+  use Mako's filter - check your 
+  environment.py file.
+
 - Fixed call to "unicode.strip" in 
   exceptions.text_error_template which
   is not Py3k compatible.  [ticket:137]

diff --git a/doc/build/content/filtering.txt b/doc/build/content/filtering.txt
@@ -12,7 +12,7 @@ The above expression applies URL escaping to the expression, and produces `this+
 The built-in escape flags are:
 
 * `u` : URL escaping, provided by `urllib.quote_plus(string.encode('utf-8'))`
-* `h` : HTML escaping, provided by `cgi.escape(string, True)`
+* `h` : HTML escaping, provided by `markupsafe.escape(string)`  (new as of 0.3.4 - prior versions use `cgi.escape(string, True)`)
 * `x` : XML escaping
 * `trim` : whitespace trimming, provided by `string.strip()`
 * `entity` : produces HTML entity references for applicable strings, derived from `htmlentitydefs`

diff --git a/doc/build/content/unicode.txt b/doc/build/content/unicode.txt
@@ -141,6 +141,8 @@ Where above that the string literal used within `context.write` is a regular byt
 
 When `disable_unicode=True` is turned on, the `default_filters` argument which normally defaults to `["unicode"]` now defaults to `["str"]` instead.  Setting default_filters to the empty list `[]` can remove the overhead of the `str` call.  Also, in this mode you **cannot** safely call `render_unicode()` - you'll get unicode/decode errors.
 
+The `h` filter (html escape) uses a less performant pure Python escape function in non-unicode mode (note that in versions prior to 0.3.4, it used cgi.escape(), which has been replaced with a function that also escapes single quotes).  This because MarkupSafe only supports Python unicode objects for non-ascii strings.
+
 **Rules for using disable_unicode=True**
 
  * don't use this mode unless you really, really want to and you absolutely understand what you're doing

diff --git a/mako/codegen.py b/mako/codegen.py
@@ -20,7 +20,8 @@ def compile(node,
                 buffer_filters=None, 
                 imports=None, 
                 source_encoding=None, 
-                generate_magic_comment=True):
+                generate_magic_comment=True,
+                disable_unicode=False):
 
     """Generate module source code given a parsetree node, 
       uri, and optional source filename"""
@@ -43,7 +44,8 @@ def compile(node,
                                             buffer_filters,
                                             imports, 
                                             source_encoding,
-                                            generate_magic_comment), 
+                                            generate_magic_comment,
+                                            disable_unicode), 
                                 node)
     return buf.getvalue()
 
@@ -55,14 +57,16 @@ def __init__(self,
                     buffer_filters, 
                     imports, 
                     source_encoding, 
-                    generate_magic_comment):
+                    generate_magic_comment,
+                    disable_unicode):
         self.uri = uri
         self.filename = filename
         self.default_filters = default_filters
         self.buffer_filters = buffer_filters
         self.imports = imports
         self.source_encoding = source_encoding
         self.generate_magic_comment = generate_magic_comment
+        self.disable_unicode = disable_unicode
 
 class _GenerateRenderMethod(object):
     """A template visitor object which generates the 
@@ -586,6 +590,8 @@ def create_filter_callable(self, args, target, is_expression):
         def locate_encode(name):
             if re.match(r'decode\..+', name):
                 return "filters." + name
+            elif self.compiler.disable_unicode:
+                return filters.NON_UNICODE_ESCAPES.get(name, name)
             else:
                 return filters.DEFAULT_ESCAPES.get(name, name)
 

diff --git a/mako/filters.py b/mako/filters.py
@@ -5,9 +5,10 @@
 # the MIT License: http://www.opensource.org/licenses/mit-license.php
 
 
-import re, cgi, urllib, htmlentitydefs, codecs
+import re, urllib, htmlentitydefs, codecs
 from StringIO import StringIO
 from mako import util
+import markupsafe
 
 xml_escapes = {
     '&' : '&amp;',
@@ -16,12 +17,18 @@
     '"' : '&#34;',   # also &quot; in html-only
     "'" : '&#39;'    # also &apos; in html-only    
 }
+
 # XXX: &quot; is valid in HTML and XML
 #      &apos; is not valid HTML, but is valid XML
 
 def html_escape(string):
-    return cgi.escape(string, True)
+    return markupsafe.escape(string)
+
+def legacy_html_escape(string):
+    """legacy HTML escape for non-unicode mode."""
 
+    return re.sub(r'([&<"\'>])', lambda m: xml_escapes[m.group()], string)
+
 def xml_escape(string):
     return re.sub(r'([&<"\'>])', lambda m: xml_escapes[m.group()], string)
 
@@ -173,3 +180,6 @@ def htmlentityreplace_errors(ex):
         'unicode':'str'
     })
 
+NON_UNICODE_ESCAPES = DEFAULT_ESCAPES.copy()
+NON_UNICODE_ESCAPES['h'] = 'filters.legacy_html_escape'
+
diff --git a/mako/template.py b/mako/template.py
@@ -363,7 +363,8 @@ def _compile_text(template, text, filename):
                             buffer_filters=template.buffer_filters, 
                             imports=template.imports, 
                             source_encoding=lexer.encoding,
-                            generate_magic_comment=template.disable_unicode)
+                            generate_magic_comment=template.disable_unicode,
+                            disable_unicode=template.disable_unicode)
 
     cid = identifier
     if not util.py3k and isinstance(cid, unicode):
@@ -389,7 +390,8 @@ def _compile_module_file(template, text, filename, outputpath):
                                 buffer_filters=template.buffer_filters,
                                 imports=template.imports,
                                 source_encoding=lexer.encoding,
-                                generate_magic_comment=True)
+                                generate_magic_comment=True,
+                                disable_unicode=template.disable_unicode)
 
     # make tempfiles in the same location as the ultimate 
     # location.   this ensures they're on the same filesystem,

diff --git a/setup.py b/setup.py
@@ -48,6 +48,7 @@
       zip_safe=False,
       install_requires=[
           'Beaker>=1.1',
+          'MarkupSafe>=0.9.2',
       ],
       entry_points="""
       [python.templating.engines]

diff --git a/test/test_exceptions.py b/test/test_exceptions.py
@@ -81,10 +81,10 @@ def test_utf8_html_error_template(self):
                     html_error.decode('utf-8')
 
             if util.py3k:
-                assert u"3 ${'привет'}".encode(sys.getdefaultencoding(),
+                assert u"3 ${&#39;привет&#39;}".encode(sys.getdefaultencoding(),
                                             'htmlentityreplace') in html_error
             else:
-                assert u"3 ${u'привет'}".encode(sys.getdefaultencoding(),
+                assert u"3 ${u&#39;привет&#39;}".encode(sys.getdefaultencoding(),
                                             'htmlentityreplace') in html_error
         else:
             assert False, ("This function should trigger a CompileException, "
@@ -108,10 +108,10 @@ def test_py_utf8_html_error_template(self):
             html_error = exceptions.html_error_template().render()
             if util.py3k:
                 assert 'RuntimeError: test' in html_error.decode('utf-8')
-                assert u"foo = '日本'" in html_error.decode('utf-8')
+                assert u"foo = &#39;日本&#39;" in html_error.decode('utf-8')
             else:
                 assert 'RuntimeError: test' in html_error
-                assert "foo = u'&#x65E5;&#x672C;'" in html_error
+                assert "foo = u&#39;&#x65E5;&#x672C;&#39;" in html_error
 
     def test_py_unicode_error_html_error_template(self):
         try:
@@ -146,11 +146,11 @@ def test_utf8_format_exceptions(self):
             l.put_string("foo.html", """# -*- coding: utf-8 -*-\n${u'привет' + foobar}""")
 
         if util.py3k:
-            assert u'<div class="sourceline">${\'привет\' + foobar}</div>'\
+            assert u'<div class="sourceline">${&#39;привет&#39; + foobar}</div>'\
                 in result_lines(l.get_template("foo.html").render().decode('utf-8'))
         else:
-            assert '<div class="highlight">2 ${u\'&#x43F;&#x440;'\
-                    '&#x438;&#x432;&#x435;&#x442;\' + foobar}</div>' \
+            assert '<div class="highlight">2 ${u&#39;&#x43F;&#x440;'\
+                    '&#x438;&#x432;&#x435;&#x442;&#39; + foobar}</div>' \
                 in result_lines(l.get_template("foo.html").render().decode('utf-8'))
 
 
@@ -169,7 +169,7 @@ def test_custom_tback(self):
 
         # obfuscate the text so that this text
         # isn't in the 'wrong' exception
-        assert "".join(reversed(")'rab'(oof")) in html_error
+        assert "".join(reversed(");93#&rab;93#&(oof")) in html_error
 
     def test_tback_no_trace(self):
         try:

diff --git a/test/test_filters.py b/test/test_filters.py
@@ -2,9 +2,11 @@
 
 from mako.template import Template
 import unittest
+from mako import util
+from test import TemplateTest, eq_, skip_if
 from util import result_lines, flatten_result
 
-class FilterTest(unittest.TestCase):
+class FilterTest(TemplateTest):
     def test_basic(self):
         t = Template("""
         ${x | myfilter}
@@ -26,7 +28,29 @@ def test_convert_str(self):
             ${x | trim}
         """)
         assert flatten_result(t.render(x=5)) == "5"
+
+    def test_quoting(self):
+        t = Template("""
+            foo ${bar | h}
+        """)
+
+        eq_(
+            flatten_result(t.render(bar="<'some bar'>")),
+            "foo &lt;&#39;some bar&#39;&gt;"
+        )
+
+    @skip_if(lambda: util.py3k)
+    def test_quoting_non_unicode(self):
+        t = Template("""
+            foo ${bar | h}
+        """, disable_unicode=True)
 
+        eq_(
+            flatten_result(t.render(bar="<'привет'>")),
+            "foo &lt;&#39;привет&#39;&gt;"
+        )
+
+
     def test_def(self):
         t = Template("""
             <%def name="foo()" filter="myfilter">