Skip to content

Commit

Permalink
- Now using MarkupSafe for HTML escaping,
Browse files Browse the repository at this point in the history
  i.e. in place of cgi.escape().  Faster
  C-based implementation and also escapes
  single quotes for additional security.
  Supports the __html__ attribute for
  the given expression as well.

  When using "disable_unicode" mode,
  a pure Python HTML escaper function
  is used which also quotes single quotes.

  Note that Pylons by default doesn't
  use Mako's filter - check your
  environment.py file.
  • Loading branch information
zzzeek committed Jun 22, 2010
1 parent c387465 commit a0354c3
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 17 deletions.
15 changes: 15 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
0.3.4
- Now using MarkupSafe for HTML escaping,
i.e. in place of cgi.escape(). Faster
C-based implementation and also escapes
single quotes for additional security.
Supports the __html__ attribute for
the given expression as well.

When using "disable_unicode" mode,
a pure Python HTML escaper function
is used which also quotes single quotes.

Note that Pylons by default doesn't
use Mako's filter - check your
environment.py file.

- Fixed call to "unicode.strip" in
exceptions.text_error_template which
is not Py3k compatible. [ticket:137]
Expand Down
2 changes: 1 addition & 1 deletion doc/build/content/filtering.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ The above expression applies URL escaping to the expression, and produces `this+
The built-in escape flags are:

* `u` : URL escaping, provided by `urllib.quote_plus(string.encode('utf-8'))`
* `h` : HTML escaping, provided by `cgi.escape(string, True)`
* `h` : HTML escaping, provided by `markupsafe.escape(string)` (new as of 0.3.4 - prior versions use `cgi.escape(string, True)`)
* `x` : XML escaping
* `trim` : whitespace trimming, provided by `string.strip()`
* `entity` : produces HTML entity references for applicable strings, derived from `htmlentitydefs`
Expand Down
2 changes: 2 additions & 0 deletions doc/build/content/unicode.txt
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ Where above that the string literal used within `context.write` is a regular byt

When `disable_unicode=True` is turned on, the `default_filters` argument which normally defaults to `["unicode"]` now defaults to `["str"]` instead. Setting default_filters to the empty list `[]` can remove the overhead of the `str` call. Also, in this mode you **cannot** safely call `render_unicode()` - you'll get unicode/decode errors.

The `h` filter (html escape) uses a less performant pure Python escape function in non-unicode mode (note that in versions prior to 0.3.4, it used cgi.escape(), which has been replaced with a function that also escapes single quotes). This because MarkupSafe only supports Python unicode objects for non-ascii strings.

**Rules for using disable_unicode=True**

* don't use this mode unless you really, really want to and you absolutely understand what you're doing
Expand Down
12 changes: 9 additions & 3 deletions mako/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ def compile(node,
buffer_filters=None,
imports=None,
source_encoding=None,
generate_magic_comment=True):
generate_magic_comment=True,
disable_unicode=False):

"""Generate module source code given a parsetree node,
uri, and optional source filename"""
Expand All @@ -43,7 +44,8 @@ def compile(node,
buffer_filters,
imports,
source_encoding,
generate_magic_comment),
generate_magic_comment,
disable_unicode),
node)
return buf.getvalue()

Expand All @@ -55,14 +57,16 @@ def __init__(self,
buffer_filters,
imports,
source_encoding,
generate_magic_comment):
generate_magic_comment,
disable_unicode):
self.uri = uri
self.filename = filename
self.default_filters = default_filters
self.buffer_filters = buffer_filters
self.imports = imports
self.source_encoding = source_encoding
self.generate_magic_comment = generate_magic_comment
self.disable_unicode = disable_unicode

class _GenerateRenderMethod(object):
"""A template visitor object which generates the
Expand Down Expand Up @@ -586,6 +590,8 @@ def create_filter_callable(self, args, target, is_expression):
def locate_encode(name):
if re.match(r'decode\..+', name):
return "filters." + name
elif self.compiler.disable_unicode:
return filters.NON_UNICODE_ESCAPES.get(name, name)
else:
return filters.DEFAULT_ESCAPES.get(name, name)

Expand Down
14 changes: 12 additions & 2 deletions mako/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
# the MIT License: http://www.opensource.org/licenses/mit-license.php


import re, cgi, urllib, htmlentitydefs, codecs
import re, urllib, htmlentitydefs, codecs
from StringIO import StringIO
from mako import util
import markupsafe

xml_escapes = {
'&' : '&',
Expand All @@ -16,12 +17,18 @@
'"' : '"', # also " in html-only
"'" : ''' # also ' in html-only
}

# XXX: " is valid in HTML and XML
# ' is not valid HTML, but is valid XML

def html_escape(string):
return cgi.escape(string, True)
return markupsafe.escape(string)

def legacy_html_escape(string):
"""legacy HTML escape for non-unicode mode."""

return re.sub(r'([&<"\'>])', lambda m: xml_escapes[m.group()], string)

def xml_escape(string):
return re.sub(r'([&<"\'>])', lambda m: xml_escapes[m.group()], string)

Expand Down Expand Up @@ -173,3 +180,6 @@ def htmlentityreplace_errors(ex):
'unicode':'str'
})

NON_UNICODE_ESCAPES = DEFAULT_ESCAPES.copy()
NON_UNICODE_ESCAPES['h'] = 'filters.legacy_html_escape'

6 changes: 4 additions & 2 deletions mako/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,8 @@ def _compile_text(template, text, filename):
buffer_filters=template.buffer_filters,
imports=template.imports,
source_encoding=lexer.encoding,
generate_magic_comment=template.disable_unicode)
generate_magic_comment=template.disable_unicode,
disable_unicode=template.disable_unicode)

cid = identifier
if not util.py3k and isinstance(cid, unicode):
Expand All @@ -389,7 +390,8 @@ def _compile_module_file(template, text, filename, outputpath):
buffer_filters=template.buffer_filters,
imports=template.imports,
source_encoding=lexer.encoding,
generate_magic_comment=True)
generate_magic_comment=True,
disable_unicode=template.disable_unicode)

# make tempfiles in the same location as the ultimate
# location. this ensures they're on the same filesystem,
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
zip_safe=False,
install_requires=[
'Beaker>=1.1',
'MarkupSafe>=0.9.2',
],
entry_points="""
[python.templating.engines]
Expand Down
16 changes: 8 additions & 8 deletions test/test_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,10 @@ def test_utf8_html_error_template(self):
html_error.decode('utf-8')

if util.py3k:
assert u"3 ${'привет'}".encode(sys.getdefaultencoding(),
assert u"3 ${&#39;привет&#39;}".encode(sys.getdefaultencoding(),
'htmlentityreplace') in html_error
else:
assert u"3 ${u'привет'}".encode(sys.getdefaultencoding(),
assert u"3 ${u&#39;привет&#39;}".encode(sys.getdefaultencoding(),
'htmlentityreplace') in html_error
else:
assert False, ("This function should trigger a CompileException, "
Expand All @@ -108,10 +108,10 @@ def test_py_utf8_html_error_template(self):
html_error = exceptions.html_error_template().render()
if util.py3k:
assert 'RuntimeError: test' in html_error.decode('utf-8')
assert u"foo = '日本'" in html_error.decode('utf-8')
assert u"foo = &#39;日本&#39;" in html_error.decode('utf-8')
else:
assert 'RuntimeError: test' in html_error
assert "foo = u'&#x65E5;&#x672C;'" in html_error
assert "foo = u&#39;&#x65E5;&#x672C;&#39;" in html_error

def test_py_unicode_error_html_error_template(self):
try:
Expand Down Expand Up @@ -146,11 +146,11 @@ def test_utf8_format_exceptions(self):
l.put_string("foo.html", """# -*- coding: utf-8 -*-\n${u'привет' + foobar}""")

if util.py3k:
assert u'<div class="sourceline">${\'привет\' + foobar}</div>'\
assert u'<div class="sourceline">${&#39;привет&#39; + foobar}</div>'\
in result_lines(l.get_template("foo.html").render().decode('utf-8'))
else:
assert '<div class="highlight">2 ${u\'&#x43F;&#x440;'\
'&#x438;&#x432;&#x435;&#x442;\' + foobar}</div>' \
assert '<div class="highlight">2 ${u&#39;&#x43F;&#x440;'\
'&#x438;&#x432;&#x435;&#x442;&#39; + foobar}</div>' \
in result_lines(l.get_template("foo.html").render().decode('utf-8'))


Expand All @@ -169,7 +169,7 @@ def test_custom_tback(self):

# obfuscate the text so that this text
# isn't in the 'wrong' exception
assert "".join(reversed(")'rab'(oof")) in html_error
assert "".join(reversed(");93#&rab;93#&(oof")) in html_error

def test_tback_no_trace(self):
try:
Expand Down
26 changes: 25 additions & 1 deletion test/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

from mako.template import Template
import unittest
from mako import util
from test import TemplateTest, eq_, skip_if
from util import result_lines, flatten_result

class FilterTest(unittest.TestCase):
class FilterTest(TemplateTest):
def test_basic(self):
t = Template("""
${x | myfilter}
Expand All @@ -26,7 +28,29 @@ def test_convert_str(self):
${x | trim}
""")
assert flatten_result(t.render(x=5)) == "5"

def test_quoting(self):
t = Template("""
foo ${bar | h}
""")

eq_(
flatten_result(t.render(bar="<'some bar'>")),
"foo &lt;&#39;some bar&#39;&gt;"
)

@skip_if(lambda: util.py3k)
def test_quoting_non_unicode(self):
t = Template("""
foo ${bar | h}
""", disable_unicode=True)

eq_(
flatten_result(t.render(bar="<'привет'>")),
"foo &lt;&#39;привет&#39;&gt;"
)


def test_def(self):
t = Template("""
<%def name="foo()" filter="myfilter">
Expand Down

0 comments on commit a0354c3

Please sign in to comment.