issue 42: "smarty-pants" extra: add \' and \" escapes for dump quotes

(only enabled when this extra is in use)
trentm · Jun 21, 2010 · 9981d70 · 9981d70
1 parent 302a7c7
commit 9981d70
Show file tree

Hide file tree

Showing 5 changed files with 41 additions and 23 deletions.
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -3,9 +3,10 @@
 ## python-markdown2 v1.0.1.18 (not yet released)
 
 - [Issue 42] Add "smarty-pants" extra for transforming plain ASCII
-  punctuation characters into “smart” typographic punctuation HTML entities.
+  punctuation characters into smart typographic punctuation HTML entities.
   Inspiration: <http://daringfireball.net/projects/smartypants/>
-  Implementation by Nikhil Chelliah.
+  Implementation by Nikhil Chelliah. Also add `\'` and `\"` escape sequences
+  for forcing dumb quotes when this extra is in use.
 - Guard against using using `True` instead of `None` as follows 
   `markdown(..., extras={'header-ids': True})`. `None` is wanted, but `True`
   is commonly (at least I did it twice) used.

diff --git a/lib/markdown2.py b/lib/markdown2.py
@@ -210,6 +210,11 @@ def __init__(self, html4tags=False, tab_width=4, safe_mode=None,
         self.use_file_vars = use_file_vars
         self._outdent_re = re.compile(r'^(\t|[ ]{1,%d})' % tab_width, re.M)
 
+        self._escape_table = g_escape_table.copy()
+        if "smarty-pants" in self.extras:
+            self._escape_table['"'] = _hash_ascii('"')
+            self._escape_table["'"] = _hash_ascii("'")
+
     def reset(self):
         self.urls = {}
         self.titles = {}
@@ -793,8 +798,8 @@ def _escape_special_chars(self, text):
                 # character with its corresponding MD5 checksum value;
                 # this is likely overkill, but it should prevent us from
                 # colliding with the escape values by accident.
-                escaped.append(token.replace('*', g_escape_table['*'])
-                                    .replace('_', g_escape_table['_']))
+                escaped.append(token.replace('*', self._escape_table['*'])
+                                    .replace('_', self._escape_table['_']))
             else:
                 escaped.append(self._encode_backslash_escapes(token))
             is_html_markup = not is_html_markup
@@ -964,12 +969,12 @@ def _do_links(self, text):
                         url = url[1:-1]  # '<url>' -> 'url'
                     # We've got to encode these to avoid conflicting
                     # with italics/bold.
-                    url = url.replace('*', g_escape_table['*']) \
-                             .replace('_', g_escape_table['_'])
+                    url = url.replace('*', self._escape_table['*']) \
+                             .replace('_', self._escape_table['_'])
                     if title:
                         title_str = ' title="%s"' \
-                            % title.replace('*', g_escape_table['*']) \
-                                   .replace('_', g_escape_table['_']) \
+                            % title.replace('*', self._escape_table['*']) \
+                                   .replace('_', self._escape_table['_']) \
                                    .replace('"', '&quot;')
                     else:
                         title_str = ''
@@ -1008,12 +1013,12 @@ def _do_links(self, text):
                         url = self.urls[link_id]
                         # We've got to encode these to avoid conflicting
                         # with italics/bold.
-                        url = url.replace('*', g_escape_table['*']) \
-                                 .replace('_', g_escape_table['_'])
+                        url = url.replace('*', self._escape_table['*']) \
+                                 .replace('_', self._escape_table['_'])
                         title = self.titles.get(link_id)
                         if title:
-                            title = title.replace('*', g_escape_table['*']) \
-                                         .replace('_', g_escape_table['_'])
+                            title = title.replace('*', self._escape_table['*']) \
+                                         .replace('_', self._escape_table['_'])
                             title_str = ' title="%s"' % title
                         else:
                             title_str = ''
@@ -1404,13 +1409,13 @@ def _encode_code(self, text):
             ('<', '&lt;'),
             ('>', '&gt;'),
             # Now, escape characters that are magic in Markdown:
-            ('*', g_escape_table['*']),
-            ('_', g_escape_table['_']),
-            ('{', g_escape_table['{']),
-            ('}', g_escape_table['}']),
-            ('[', g_escape_table['[']),
-            (']', g_escape_table[']']),
-            ('\\', g_escape_table['\\']),
+            ('*', self._escape_table['*']),
+            ('_', self._escape_table['_']),
+            ('{', self._escape_table['{']),
+            ('}', self._escape_table['}']),
+            ('[', self._escape_table['[']),
+            (']', self._escape_table[']']),
+            ('\\', self._escape_table['\\']),
         ]
         for before, after in replacements:
             text = text.replace(before, after)
@@ -1584,7 +1589,7 @@ def _encode_amps_and_angles(self, text):
         return text
 
     def _encode_backslash_escapes(self, text):
-        for ch, escape in g_escape_table.items():
+        for ch, escape in self._escape_table.items():
             text = text.replace("\\"+ch, escape)
         return text
 
@@ -1653,8 +1658,8 @@ def _do_link_patterns(self, text):
                 escaped_href = (
                     href.replace('"', '&quot;')  # b/c of attr quote
                         # To avoid markdown <em> and <strong>:
-                        .replace('*', g_escape_table['*'])
-                        .replace('_', g_escape_table['_']))
+                        .replace('*', self._escape_table['*'])
+                        .replace('_', self._escape_table['_']))
                 link = '<a href="%s">%s</a>' % (escaped_href, text[start:end])
                 hash = _hash_text(link)
                 link_from_hash[hash] = link
@@ -1665,7 +1670,7 @@ def _do_link_patterns(self, text):
 
     def _unescape_special_chars(self, text):
         # Swap back in all the special characters we've hidden.
-        for ch, hash in g_escape_table.items():
+        for ch, hash in self._escape_table.items():
             text = text.replace(hash, ch)
         return text
 

diff --git a/test/tm-cases/smarty_pants.html b/test/tm-cases/smarty_pants.html
@@ -4,11 +4,16 @@ <h2>&ldquo;Simple substitutions&rdquo;</h2>
 
 <p>And finally&#8230;wait for it, and again with spaces&#8230;I&rsquo;ve tested ellipses.</p>
 
+<h2>Escapes</h2>
+
 <p>Before getting to the hard stuff, I&rsquo;ll run through all the escape sequences &mdash; they shouldn&rsquo;t need to become HTML entities.</p>
 
 <pre><code>\\ \" \' \` \- \. \&gt;
 </code></pre>
 
+<p>The &ldquo;smarty-pants&rdquo; extra adds escapes for 'single quotes' and "double
+quotes" in case you want to force dumb quotes.</p>
+
 <h2>&ldquo;Quotation marks&rdquo;</h2>
 
 <p>You&rsquo;ll notice that I began this document with a quotation to test a potential error: $ is zero-width and \s is one-width, and you can&rsquo;t have both in a backreference.  Meanwhile, I&rsquo;ve this paragraph has tested contractions four times; &rsquo;tis close, but this last apostrophe should fool the regex.</p>

diff --git a/test/tm-cases/smarty_pants.tags b/test/tm-cases/smarty_pants.tags
@@ -0,0 +1 @@
+smarty-pants issue42
diff --git a/test/tm-cases/smarty_pants.text b/test/tm-cases/smarty_pants.text
@@ -5,10 +5,16 @@ Here I interrupt myself with an en dash -- no, now it's with---an em dash.
 
 And finally...wait for it, and again with spaces. . .I've tested ellipses.
 
+Escapes
+-------
+
 Before getting to the hard stuff, I'll run through all the escape sequences --- they shouldn't need to become HTML entities.
 
     \\ \" \' \` \- \. \>
 
+The "smarty-pants" extra adds escapes for \'single quotes\' and \"double
+quotes\" in case you want to force dumb quotes.
+
 "Quotation marks"
 -----------------