Skip to content

Commit

Permalink
issue 42: "smarty-pants" extra: add \' and \" escapes for dump quotes
Browse files Browse the repository at this point in the history
(only enabled when this extra is in use)
  • Loading branch information
trentm committed Jun 21, 2010
1 parent 302a7c7 commit 9981d70
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 23 deletions.
5 changes: 3 additions & 2 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
## python-markdown2 v1.0.1.18 (not yet released)

- [Issue 42] Add "smarty-pants" extra for transforming plain ASCII
punctuation characters into smart typographic punctuation HTML entities.
punctuation characters into smart typographic punctuation HTML entities.
Inspiration: <http://daringfireball.net/projects/smartypants/>
Implementation by Nikhil Chelliah.
Implementation by Nikhil Chelliah. Also add `\'` and `\"` escape sequences
for forcing dumb quotes when this extra is in use.
- Guard against using using `True` instead of `None` as follows
`markdown(..., extras={'header-ids': True})`. `None` is wanted, but `True`
is commonly (at least I did it twice) used.
Expand Down
47 changes: 26 additions & 21 deletions lib/markdown2.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,11 @@ def __init__(self, html4tags=False, tab_width=4, safe_mode=None,
self.use_file_vars = use_file_vars
self._outdent_re = re.compile(r'^(\t|[ ]{1,%d})' % tab_width, re.M)

self._escape_table = g_escape_table.copy()
if "smarty-pants" in self.extras:
self._escape_table['"'] = _hash_ascii('"')
self._escape_table["'"] = _hash_ascii("'")

def reset(self):
self.urls = {}
self.titles = {}
Expand Down Expand Up @@ -793,8 +798,8 @@ def _escape_special_chars(self, text):
# character with its corresponding MD5 checksum value;
# this is likely overkill, but it should prevent us from
# colliding with the escape values by accident.
escaped.append(token.replace('*', g_escape_table['*'])
.replace('_', g_escape_table['_']))
escaped.append(token.replace('*', self._escape_table['*'])
.replace('_', self._escape_table['_']))
else:
escaped.append(self._encode_backslash_escapes(token))
is_html_markup = not is_html_markup
Expand Down Expand Up @@ -964,12 +969,12 @@ def _do_links(self, text):
url = url[1:-1] # '<url>' -> 'url'
# We've got to encode these to avoid conflicting
# with italics/bold.
url = url.replace('*', g_escape_table['*']) \
.replace('_', g_escape_table['_'])
url = url.replace('*', self._escape_table['*']) \
.replace('_', self._escape_table['_'])
if title:
title_str = ' title="%s"' \
% title.replace('*', g_escape_table['*']) \
.replace('_', g_escape_table['_']) \
% title.replace('*', self._escape_table['*']) \
.replace('_', self._escape_table['_']) \
.replace('"', '&quot;')
else:
title_str = ''
Expand Down Expand Up @@ -1008,12 +1013,12 @@ def _do_links(self, text):
url = self.urls[link_id]
# We've got to encode these to avoid conflicting
# with italics/bold.
url = url.replace('*', g_escape_table['*']) \
.replace('_', g_escape_table['_'])
url = url.replace('*', self._escape_table['*']) \
.replace('_', self._escape_table['_'])
title = self.titles.get(link_id)
if title:
title = title.replace('*', g_escape_table['*']) \
.replace('_', g_escape_table['_'])
title = title.replace('*', self._escape_table['*']) \
.replace('_', self._escape_table['_'])
title_str = ' title="%s"' % title
else:
title_str = ''
Expand Down Expand Up @@ -1404,13 +1409,13 @@ def _encode_code(self, text):
('<', '&lt;'),
('>', '&gt;'),
# Now, escape characters that are magic in Markdown:
('*', g_escape_table['*']),
('_', g_escape_table['_']),
('{', g_escape_table['{']),
('}', g_escape_table['}']),
('[', g_escape_table['[']),
(']', g_escape_table[']']),
('\\', g_escape_table['\\']),
('*', self._escape_table['*']),
('_', self._escape_table['_']),
('{', self._escape_table['{']),
('}', self._escape_table['}']),
('[', self._escape_table['[']),
(']', self._escape_table[']']),
('\\', self._escape_table['\\']),
]
for before, after in replacements:
text = text.replace(before, after)
Expand Down Expand Up @@ -1584,7 +1589,7 @@ def _encode_amps_and_angles(self, text):
return text

def _encode_backslash_escapes(self, text):
for ch, escape in g_escape_table.items():
for ch, escape in self._escape_table.items():
text = text.replace("\\"+ch, escape)
return text

Expand Down Expand Up @@ -1653,8 +1658,8 @@ def _do_link_patterns(self, text):
escaped_href = (
href.replace('"', '&quot;') # b/c of attr quote
# To avoid markdown <em> and <strong>:
.replace('*', g_escape_table['*'])
.replace('_', g_escape_table['_']))
.replace('*', self._escape_table['*'])
.replace('_', self._escape_table['_']))
link = '<a href="%s">%s</a>' % (escaped_href, text[start:end])
hash = _hash_text(link)
link_from_hash[hash] = link
Expand All @@ -1665,7 +1670,7 @@ def _do_link_patterns(self, text):

def _unescape_special_chars(self, text):
# Swap back in all the special characters we've hidden.
for ch, hash in g_escape_table.items():
for ch, hash in self._escape_table.items():
text = text.replace(hash, ch)
return text

Expand Down
5 changes: 5 additions & 0 deletions test/tm-cases/smarty_pants.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,16 @@ <h2>&ldquo;Simple substitutions&rdquo;</h2>

<p>And finally&#8230;wait for it, and again with spaces&#8230;I&rsquo;ve tested ellipses.</p>

<h2>Escapes</h2>

<p>Before getting to the hard stuff, I&rsquo;ll run through all the escape sequences &mdash; they shouldn&rsquo;t need to become HTML entities.</p>

<pre><code>\\ \" \' \` \- \. \&gt;
</code></pre>

<p>The &ldquo;smarty-pants&rdquo; extra adds escapes for 'single quotes' and "double
quotes" in case you want to force dumb quotes.</p>

<h2>&ldquo;Quotation marks&rdquo;</h2>

<p>You&rsquo;ll notice that I began this document with a quotation to test a potential error: $ is zero-width and \s is one-width, and you can&rsquo;t have both in a backreference. Meanwhile, I&rsquo;ve this paragraph has tested contractions four times; &rsquo;tis close, but this last apostrophe should fool the regex.</p>
Expand Down
1 change: 1 addition & 0 deletions test/tm-cases/smarty_pants.tags
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
smarty-pants issue42
6 changes: 6 additions & 0 deletions test/tm-cases/smarty_pants.text
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,16 @@ Here I interrupt myself with an en dash -- no, now it's with---an em dash.

And finally...wait for it, and again with spaces. . .I've tested ellipses.

Escapes
-------

Before getting to the hard stuff, I'll run through all the escape sequences --- they shouldn't need to become HTML entities.

\\ \" \' \` \- \. \>

The "smarty-pants" extra adds escapes for \'single quotes\' and \"double
quotes\" in case you want to force dumb quotes.

"Quotation marks"
-----------------

Expand Down

0 comments on commit 9981d70

Please sign in to comment.