Some old unfinished work towards Mako template language support that I

don't want to loose. The patch here is against approx. markdown2.py@116 (i.e. a fairly old rev.)
trentm · Sep 6, 2008 · 3eb1095 · 3eb1095
1 parent 2f4b954
commit 3eb1095
Show file tree

Hide file tree

Showing 3 changed files with 394 additions and 0 deletions.
diff --git a/sandbox/mako/README.txt b/sandbox/mako/README.txt
@@ -0,0 +1,6 @@
+Mako is a templating language. I have some unfinished work here
+to attempt to get markdown2.py to treat Mako syntax just like raw
+HTML. I.e. allow writing mixed Markdown-Mako text.
+
+The key here is *unfinished*. I'm not sure if it is reasonable or feasible
+to support this with the current implementation.
diff --git a/sandbox/mako/mako_templates.text b/sandbox/mako/mako_templates.text
@@ -0,0 +1,21 @@
+<!-- From front page of makotemplates.org with some Markdown additions. -->
+
+Here is *some* [Markdown](http://google.com/?q=Markdown).
+
+<%inherit file="base.html"/>
+<%
+    rows = [[v for v in range(0,10)] for row in range(0,10)]
+%>
+<table>
+    % for row in rows:
+        ${makerow(row)}
+    % endfor
+</table>
+
+<%def name="makerow(row)">
+    <tr>
+    % for name in row:
+        <td>${name}</td>\
+    % endfor
+    </tr>
+</%def>
diff --git a/sandbox/mako/markdown2.patch b/sandbox/mako/markdown2.patch
@@ -0,0 +1,367 @@
+--- markdown2.py.116	2008-09-06 10:05:56.000000000 -0700
++++ markdown2.py.makowork	2008-02-03 12:30:15.000000000 -0800
+@@ -1,39 +1,38 @@
+ #!/usr/bin/env python
+ # Copyright (c) 2007 ActiveState Corp.
+
+-"""A fast and complete Python implementation of Markdown.
++r"""A fast and complete Python implementation of Markdown.
+
+ [from http://daringfireball.net/projects/markdown/]
+ > Markdown is a text-to-HTML filter; it translates an easy-to-read /
+ > easy-to-write structured text format into HTML.  Markdown's text
+ > format is most similar to that of plain text email, and supports
+ > features such as headers, *emphasis*, code blocks, blockquotes, and
+ > links.
+ >
+ > Markdown's syntax is designed not as a generic markup language, but
+ > specifically to serve as a front-end to (X)HTML. You can use span-level
+ > HTML tags anywhere in a Markdown document, and you can use block level
+ > HTML tags (like <div> and <table> as well).
+
+ Module usage:
+
+     >>> import markdown2
+-    >>> html = markdown2.markdown_path(path, ...)
+-    >>> markdown2.markdown("*boo!*", ...)
+-    <em>boo!</em>
++    >>> markdown2.markdown("*boo!*")   # also markdown2.markdown_path(<path>)
++    u'<p><em>boo!</em></p>\n'
+
+-    >>> markdowner = Markdown(...)
++    >>> markdowner = Markdown()
+     >>> markdowner.convert("*boo!*")
+-    <em>boo!</em>
++    u'<p><em>boo!</em></p>\n'
+     >>> markdowner.convert("**boom!**")
+-    <strong>boom!</strong>
++    u'<p><strong>boom!</strong></p>\n'
+
+ This implementation of Markdown implements the full "core" syntax plus a
+ number of extras (e.g., code syntax coloring, footnotes) as described on
+ <http://code.google.com/p/python-markdown2/wiki/Extras>.
+ """
+
+ cmdln_desc = """A fast and complete Python implementation of Markdown, a
+ text-to-HTML conversion tool for web writers.
+ """
+
+@@ -118,21 +117,21 @@
+                     safe_mode=safe_mode, extras=extras,
+                     link_patterns=link_patterns).convert(text)
+
+ class Markdown(object):
+     # The set of "extras" to enable in processing. This can be set
+     # via (a) subclassing and (b) the constructor "extras" argument.
+     extras = None
+
+     urls = None
+     titles = None
+-    html_blocks = None
++    html_blocks = None  # a HashTable instance
+     html_spans = None
+     html_removed_text = "[HTML_REMOVED]"  # for compat with markdown.py
+
+     # Used to track when we're inside an ordered or unordered list
+     # (see _ProcessListItems() for details):
+     list_level = 0
+
+     _ws_only_line_re = re.compile(r"^[ \t]+$", re.M)
+
+     def __init__(self, html4tags=False, tab_width=4, safe_mode=None,
+@@ -157,21 +156,21 @@
+             self.extras = set(self.extras)
+         if extras:
+             self.extras.update(extras)
+         self._instance_extra = self.extras.copy()
+         self.link_patterns = link_patterns
+         self._outdent_re = re.compile(r'^(\t|[ ]{1,%d})' % tab_width, re.M)
+
+     def reset(self):
+         self.urls = {}
+         self.titles = {}
+-        self.html_blocks = {}
++        self.html_blocks = HashTable()
+         self.html_spans = {}
+         self.list_level = 0
+         self.extras = self._instance_extra.copy()
+         if "footnotes" in self.extras:
+             self.footnotes = {}
+             self.footnote_ids = []
+
+     def convert(self, text):
+         """Convert the given text."""
+         # Main function. The order in which other subs are called here is
+@@ -206,20 +205,23 @@
+
+         # Strip any lines consisting only of spaces and tabs.
+         # This makes subsequent regexen easier to write, because we can
+         # match consecutive blank lines with /\n+/ instead of something
+         # contorted like /[ \t]*\n+/ .
+         text = self._ws_only_line_re.sub("", text)
+
+         if self.safe_mode:
+             text = self._hash_html_spans(text)
+
++        if "mako" in self.extras:
++            text = self._hash_mako_blocks(text)
++
+         # Turn block-level HTML blocks into hash entries
+         text = self._hash_html_blocks(text, raw=True)
+
+         # Strip link definitions, store in hashes.
+         if "footnotes" in self.extras:
+             # Must do footnotes first because an unlucky footnote defn
+             # looks like a link defn:
+             #   [^4]: this "looks like a link defn"
+             text = self._strip_footnote_definitions(text)
+         text = self._strip_link_definitions(text)
+@@ -404,23 +406,22 @@
+             [ \t]*              # trailing spaces/tabs
+             (?=\n+|\Z)          # followed by a newline or end of document
+         )
+         """ % _block_tags_b,
+         re.X | re.M)
+
+     def _hash_html_block_sub(self, match, raw=False):
+         html = match.group(1)
+         if raw and self.safe_mode:
+             html = self._sanitize_html(html)
+-        key = _hash_text(html)
+-        self.html_blocks[key] = html
+-        return "\n\n" + key + "\n\n"
++        hash = self.html_blocks.add(html)
++        return "\n\n" + hash + "\n\n"
+
+     def _hash_html_blocks(self, text, raw=False):
+         """Hashify HTML blocks
+
+         We only want to do this for block-level HTML tags, such as headers,
+         lists, and tables. That's because we still want to wrap <p>s around
+         "paragraphs" that are wrapped in non-block-level tags, such as anchors,
+         phrase emphasis, and spans. The list of tags we're looking for is
+         hard-coded.
+
+@@ -455,20 +456,76 @@
+             _hr_tag_re = _hr_tag_re_from_tab_width(self.tab_width)
+             text = _hr_tag_re.sub(hash_html_block_sub, text)
+
+         # Special case for standalone HTML comments:
+         if "<!--" in text:
+             _html_comment_re = _html_comment_re_from_tab_width(self.tab_width)
+             text = _html_comment_re.sub(hash_html_block_sub, text)
+
+         return text
+
++    _mako_regexes = [
++        # http://www.makotemplates.org/docs/syntax.html
++        # Ordering of these regexes is important.
++
++        # Python Blocks
++        re.compile(r'''
++            <%!?\B.*?%>
++            [ \t]*              # trailing spaces/tabs
++            (?=\n+|\Z)          # followed by a newline or end of document
++            ''', re.M | re.S | re.X),
++
++        # Tags
++        # - Block tags
++        re.compile(r'''
++            <%(def|call|doc|text)\b.*?>
++            .*?
++            </%\1>
++            [ \t]*              # trailing spaces/tabs
++            (?=\n+|\Z)          # followed by a newline or end of document
++            ''', re.M | re.S | re.X),
++        # - Single tag
++        re.compile(r'''
++            <%(page|include|namespace|inherit)\b.*?/>
++            [ \t]*              # trailing spaces/tabs
++            (?=\n+|\Z)          # followed by a newline or end of document
++            ''', re.M | re.S | re.X),
++
++        # Control Structures
++        # Note: don't support "Newline Filters".
++        re.compile(r'''
++            ^[ \t]*%[ \t]*(for|if)
++            .*?
++            ^[ \t]*%[ \t]*end\1
++            [ \t]*              # trailing spaces/tabs
++            (?=\n+|\Z)          # followed by a newline or end of document
++            ''', re.M | re.S | re.X),
++
++        # Comments
++        # Note: don't support "Newline Filters".
++        re.compile(r'^[ \t]*##.*?$', re.M),
++
++        # Expression Substitution
++        re.compile(r'\${.*?}'),
++    ]
++
++    def _hash_mako_block_sub(self, match):
++        mako_block = match.group(0)
++        key = _hash_text(mako_block)
++        hash = self.html_blocks.add(mako_block)
++        return "\n\n" + hash + "\n\n"
++
++    def _hash_mako_blocks(self, text):
++        for regex in self._mako_regexes:
++            text = regex.sub(self._hash_mako_block_sub, text)
++        return text
++
+     def _strip_link_definitions(self, text):
+         # Strips link definitions from text, stores the URLs and titles in
+         # hash references.
+         less_than_tab = self.tab_width - 1
+
+         # Link defs are in the form: ^[id]: url "optional title"
+         _link_def_re = re.compile(r"""
+             ^[ ]{0,%d}\[(.+)\]: # id = \1
+               [ \t]*
+               \n?               # maybe *one* newline
+@@ -597,21 +654,21 @@
+
+         # Do hard breaks:
+         text = re.sub(r" {2,}\n", " <br%s\n" % self.empty_element_suffix, text)
+
+         return text
+
+     # "Sorta" because auto-links are identified as "tag" tokens.
+     _sorta_html_tokenize_re = re.compile(r"""
+         (
+             # tag
+-            </?         
++            </?         #TODO: append '%?' for Mako, how best to do this?
+             (?:\w+)                                     # tag name
+             (?:\s+(?:[\w-]+:)?[\w-]+=(?:".*?"|'.*?'))*  # attributes
+             \s*/?>
+             |
+             # auto-link (e.g., <http://www.activestate.com/>)
+             <\w+[^>]*>
+             |
+             <!--.*?-->      # comment
+             |
+             <\?.*?\?>       # processing instruction
+@@ -1227,21 +1284,21 @@
+
+     def _form_paragraphs(self, text):
+         # Strip leading and trailing lines:
+         text = text.strip('\n')
+
+         # Wrap <p> tags.
+         grafs = re.split(r"\n{2,}", text)
+         for i, graf in enumerate(grafs):
+             if graf in self.html_blocks:
+                 # Unhashify HTML blocks
+-                grafs[i] = self.html_blocks[graf]
++                grafs[i] = self.html_blocks.unhash(graf)
+             else:
+                 # Wrap <p> tags.
+                 graf = self._run_span_gamut(graf)
+                 grafs[i] = "<p>" + graf.lstrip(" \t") + "</p>"
+
+         return "\n\n".join(grafs)
+
+     def _add_footnotes(self, text):
+         if self.footnotes:
+             footer = [
+@@ -1376,20 +1433,56 @@
+
+     - code-friendly: because it *disables* part of the syntax
+     - link-patterns: because you need to specify some actual
+       link-patterns anyway
+     """
+     extras = ["footnotes", "code-color"]
+
+
+ #---- internal support functions
+
++class HashTable(dict):
++    """A table for mapping hashed versions of text. Basically
++    it is a {<hash>: <text>} dictionary with the .add() and .unhash()
++    convenience methods.
++
++        >>> tbl = HashTable()
++        >>> hash = tbl.add("foo")
++        >>> hash
++        '!{hash}acbd18db4cc2f85cedef654fccc4a4d8!'
++        >>> hash in tbl
++        True
++        >>> tbl[hash]
++        'foo'
++        >>> tbl.unhash("bar %s bar" % hash)
++        'bar foo bar'
++    """
++    def add(self, text):
++        hash = _hash_text(text)
++        self[hash] = text
++        return hash
++
++    _hash_re = re.compile("!{hash}[0-9a-z]{32}!")
++    def _unhash_sub(self, match):
++        hash = match.group(0)
++        if hash in self:
++            return self.unhash(self[hash])
++        else:
++            return hash
++
++    def unhash(self, text):
++        if "!{hash}" not in text:
++            return text
++        return self._hash_re.sub(self._unhash_sub, text)
++
++
++
+ # From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549
+ def _curry(*args, **kwargs):
+     function, args = args[0], args[1:]
+     def result(*rest, **kwrest):
+         combined = kwargs.copy()
+         combined.update(kwrest)
+         return function(*args + rest, **combined)
+     return result
+
+ # Recipe: regex_from_encoded_pattern (1.0)
+@@ -1579,21 +1672,21 @@
+     # '@' *must* be encoded. I [John Gruber] insist.
+     if r > 0.9 and ch != "@":
+         return ch
+     elif r < 0.45:
+         # The [1:] is to drop leading '0': 0x63 -> x63
+         return '&#%s;' % hex(ord(ch))[1:]
+     else:
+         return '&#%s;' % ord(ch)
+
+ def _hash_text(text):
+-    return '!'+md5.md5(text.encode("utf-8")).hexdigest()+'!'
++    return '!{hash}'+md5.md5(text.encode("utf-8")).hexdigest()+'!'
+
+
+ #---- mainline
+
+ class _NoReflowFormatter(optparse.IndentedHelpFormatter):
+     """An optparse formatter that does NOT reflow the description."""
+     def format_description(self, description):
+         return description or ""
+
+ def _test():
+@@ -1668,18 +1761,17 @@
+     markdown_pl = join(dirname(__file__), "test", "Markdown.pl")
+     for path in paths:
+         if opts.compare:
+             print "-- Markdown.pl"
+             os.system('perl %s "%s"' % (markdown_pl, path))
+             print "-- markdown2.py"
+         html = markdown_path(path, encoding=opts.encoding,
+                              html4tags=opts.html4tags,
+                              safe_mode=opts.safe_mode,
+                              extras=extras, link_patterns=link_patterns)
+-        sys.stdout.write(
+-            html.encode(sys.stdout.encoding, 'xmlcharrefreplace'))
++        sys.stdout.write(html.encode(sys.stdout.encoding, "xmlcharrefreplace"))
+
+
+ if __name__ == "__main__":
+     logging.basicConfig()
+     sys.exit( main(sys.argv) )
+