Permalink
Browse files

first stab

  • Loading branch information...
0 parents commit 885aecb444662005977efb4c5b950768ed38c8d4 @trentm committed May 16, 2012
Showing with 12,314 additions and 0 deletions.
  1. +2 −0 .gitignore
  2. +3 −0 DEVNOTES.txt
  3. +36 −0 README.md
  4. +315 −0 bin/nodedoc.py
  5. +2 −0 deps/README.md
  6. +346 −0 deps/appdirs.py
  7. BIN deps/appdirs.pyc
  8. +2,299 −0 deps/markdown2.py
  9. BIN deps/markdown2.pyc
  10. +37 −0 doc/api/_toc.markdown
  11. +624 −0 doc/api/addons.markdown
  12. +36 −0 doc/api/all.markdown
  13. +44 −0 doc/api/appendix_1.markdown
  14. +84 −0 doc/api/assert.markdown
  15. +645 −0 doc/api/buffer.markdown
  16. +407 −0 doc/api/child_process.markdown
  17. +439 −0 doc/api/cluster.markdown
  18. +363 −0 doc/api/crypto.markdown
  19. +138 −0 doc/api/debugger.markdown
  20. +209 −0 doc/api/dgram.markdown
  21. +148 −0 doc/api/dns.markdown
  22. +68 −0 doc/api/documentation.markdown
  23. +261 −0 doc/api/domain.markdown
  24. +95 −0 doc/api/events.markdown
  25. +667 −0 doc/api/fs.markdown
  26. +150 −0 doc/api/globals.markdown
  27. +886 −0 doc/api/http.markdown
  28. +173 −0 doc/api/https.markdown
  29. +1 −0 doc/api/index.markdown
  30. +461 −0 doc/api/modules.markdown
  31. +467 −0 doc/api/net.markdown
  32. +134 −0 doc/api/os.markdown
  33. +156 −0 doc/api/path.markdown
  34. +410 −0 doc/api/process.markdown
  35. +49 −0 doc/api/querystring.markdown
  36. +283 −0 doc/api/readline.markdown
  37. +188 −0 doc/api/repl.markdown
  38. +61 −0 doc/api/stdio.markdown
  39. +184 −0 doc/api/stream.markdown
  40. +24 −0 doc/api/string_decoder.markdown
  41. +23 −0 doc/api/synopsis.markdown
  42. +31 −0 doc/api/timers.markdown
  43. +491 −0 doc/api/tls.markdown
  44. +75 −0 doc/api/tty.markdown
  45. +99 −0 doc/api/url.markdown
  46. +190 −0 doc/api/util.markdown
  47. +220 −0 doc/api/vm.markdown
  48. +276 −0 doc/api/zlib.markdown
  49. +14 −0 package.json
2 .gitignore
@@ -0,0 +1,2 @@
+/tmp
+/node_modules
3 DEVNOTES.txt
@@ -0,0 +1,3 @@
+# dev notes
+
+ less '+10G' configure # how to call `less` to jump to particular line
36 README.md
@@ -0,0 +1,36 @@
+A fledgling `perldoc` for node.js.
+
+# Installation
+
+1. Get [node](http://nodejs.org).
+
+2. `npm install -g nodedoc`
+
+You should now have "nodedoc" on your PATH:
+
+ $ nodedoc --version
+ nodedoc 1.0.0
+
+# Status
+
+This really is a quick hack. There are a number of limitations in the current
+Markdown -> HTML -> ANSI escape-colored text. Among them:
+
+- nested lists aren't handled properly
+- `<ol>` aren't handled properly
+
+The current version of the node.js docs is a snapshot of the
+<https://github.com/joyent/node> master.
+
+
+# Examples
+
+This will render and color the fs.markdown core docs and page through them
+(using your `PAGER` environment setting, if any):
+
+ $ nodedoc fs
+
+List all nodedoc sections:
+
+ $ nodedoc -l
+
315 bin/nodedoc.py
@@ -0,0 +1,315 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""nodedoc -- fledgling perldoc for node.js
+
+Usage:
+ nodedoc SECTION
+
+See <https://github.com/trentm/nodedoc> for more info.
+"""
+
+__version_info__ = (1, 0, 0)
+__version__ = '.'.join(map(str, __version_info__))
+
+import re
+import sys
+import textwrap
+import os
+from os.path import dirname, abspath, join, exists
+import logging
+import codecs
+import optparse
+from glob import glob
+
+TOP = dirname(dirname(abspath(__file__)))
+sys.path.insert(0, join(TOP, "deps"))
+import markdown2
+import appdirs
+
+
+
+#---- globals
+
+log = logging.getLogger("nodedoc")
+CACHE_DIR = appdirs.user_cache_dir("nodedoc", "trentm")
+
+
+
+#---- exceptions
+
+class Error(Exception):
+ pass
+
+
+
+#---- stylers
+
+def red(text):
+ return '\033[31m' + text + '\033[39m'
+def green(text):
+ return '\033[32m' + text + '\033[39m'
+def cyan(text):
+ return '\033[36m' + text + '\033[39m'
+def grey(text):
+ return '\033[90m' + text + '\033[39m'
+
+def bold(text):
+ return '\033[1m' + text + '\033[22m'
+def italic(text):
+ return '\033[3m' + text + '\033[23m'
+def inverse(text):
+ return '\033[7m' + text + '\033[27m'
+
+
+
+#---- re.sub transformers
+
+def indent(match):
+ INDENT = ' '
+ text = match.group(1)
+ after = INDENT + INDENT.join(text.splitlines(True))
+ #print "XXX hit: after=%r" % after
+ return after
+
+def code(match):
+ """green. Special case grey for "Stability: ..." pre-blocks."""
+ text = match.group(1)
+ styler = green
+ if text.startswith("Stability:"):
+ styler = grey
+ lines = [
+ styler(line)
+ for line in text.splitlines(False)
+ ]
+ return '\n'.join(lines)
+
+def wrap(match, width=80):
+ """XXX TODO: somehow make the ANSI escapes zero-length for width
+ calculation."""
+ text = match.group(1)
+ text = '\n'.join(textwrap.wrap(text, width=width))
+ return text
+
+def h1(match):
+ """bold red"""
+ text = match.group(1)
+ return bold(red('# ' + text))
+ return '\n'.join(lines)
+
+def h2(match):
+ """bold red, extra leading space"""
+ text = match.group(1)
+ text = '\n' + bold(red('## ' + text))
+ return text
+
+def h3(match):
+ """bold red"""
+ text = match.group(1)
+ text = '\n' + bold(red('### ' + text))
+ return text
+
+def a(match):
+ """blue"""
+ text = match.group(1)
+ lines = [
+ '\033[34m' + line + '\033[39m'
+ for line in text.splitlines(False)
+ ]
+ return '\n'.join(lines)
+
+def em(match):
+ """cyan"""
+ text = match.group(1)
+ lines = [cyan(line) for line in text.splitlines(False)]
+ return cyan('*') + '\n'.join(lines) + cyan('*')
+
+def strong(match):
+ """bold cyan"""
+ text = match.group(1)
+ lines = [bold(cyan(line)) for line in text.splitlines(False)]
+ return bold(cyan('**')) + '\n'.join(lines) + bold(cyan('**'))
+
+def li(match):
+ """bullet and indent and reflow"""
+ text = match.group(1)
+ text = '\n'.join(textwrap.wrap(text, width=78))
+ INDENT = ' '
+ text = INDENT + INDENT.join(text.splitlines(True))
+ text = '-' + text[1:]
+ return text
+
+def noop(match):
+ return match.group(1)
+
+
+
+#---- main nodedoc functionality
+
+def generate_html_path(markdown_path, html_path):
+ if not exists(dirname(html_path)):
+ os.makedirs(dirname(html_path))
+ html = markdown2.markdown_path(markdown_path)
+ codecs.open(html_path, 'w', 'utf-8').write(html)
+
+def generate_nodedoc_path(html_path, nodedoc_path):
+ if not exists(dirname(nodedoc_path)):
+ os.makedirs(dirname(nodedoc_path))
+
+ content = codecs.open(html_path, 'r', 'utf-8').read()
+
+ # html comments: drop
+ content = re.compile('\n?<!--(.*?)-->\n', re.S).sub('', content)
+
+ # code:
+ content = re.compile('<code>(.*?)</code>', re.S).sub(code, content)
+
+ # pre: indent
+ content = re.compile('<pre>(.*?)</pre>', re.S).sub(indent, content)
+
+ # li: bullet
+ # XXX how to know in ol? AFAICT only one <ol> in node.js docs, so ignoring for now.
+ # XXX does this mess up multi-para li?
+ content = re.compile('<li>(?:<p>)?(.*?)(?:</p>)?</li>', re.S).sub(li, content)
+ # ol, ul: ignore
+ content = re.compile('\n?<ul>(.*?)</ul>\n', re.S).sub(noop, content)
+ content = re.compile('\n?<ol>(.*?)</ol>\n', re.S).sub(noop, content)
+
+ # p: wrap content at 80 columns
+ content = re.compile('<p>(.*?)</p>', re.S).sub(wrap, content)
+
+ # a: drop attrs (until/unless have a way to follow those links)
+ content = re.compile('<a[^>]*>(.*?)</a>', re.S).sub(a, content)
+
+ content = re.compile('<em>(.*?)</em>', re.S).sub(em, content)
+ content = re.compile('<strong>(.*?)</strong>', re.S).sub(strong, content)
+
+ # hN: highlight, but how to highlight different levels?
+ content = re.compile('<h1>(.*?)</h1>', re.S).sub(h1, content)
+ content = re.compile('<h2>(.*?)</h2>', re.S).sub(h2, content)
+ content = re.compile('<h3>(.*?)</h3>', re.S).sub(h3, content)
+
+ #TODO:XXX special case two adjacent h2's, e.g.:
+ #
+ # <h2>fs.utimes(path, atime, mtime, [callback])</h2>
+ #
+ # <h2>fs.utimesSync(path, atime, mtime)</h2>
+ #
+ # <p>Change file timestamps of the file referenced by the supplied path.</p>
+
+ codecs.open(nodedoc_path, 'w', 'utf-8').write(content)
+
+
+def nodedoc(section):
+ markdown_path = join(TOP, "doc", "api", section + ".markdown")
+ if not exists(markdown_path):
+ raise Error("no such section: '%s'" % section)
+
+ html_path = join(CACHE_DIR, section + ".html")
+ if not exists(html_path) or mtime(html_path) < mtime(markdown_path):
+ generate_html_path(markdown_path, html_path)
+
+ nodedoc_path = join(CACHE_DIR, section + ".nodedoc")
+ if not exists(nodedoc_path) or mtime(nodedoc_path) < mtime(html_path):
+ generate_nodedoc_path(html_path, nodedoc_path)
+
+ pager = os.environ.get("PAGER", "less")
+ cmd = '%s "%s"' % (pager, nodedoc_path)
+ return os.system(cmd)
+
+def nodedoc_sections():
+ markdown_paths = glob(join(TOP, "doc", "api", "*.markdown"))
+ for p in markdown_paths:
+ yield os.path.splitext(os.path.basename(p))[0]
+
+
+
+#---- other internal support stuff
+
+class _LowerLevelNameFormatter(logging.Formatter):
+ def format(self, record):
+ record.lowerlevelname = record.levelname.lower()
+ return logging.Formatter.format(self, record)
+
+def _setup_logging():
+ hdlr = logging.StreamHandler(sys.stdout)
+ fmt = "%(name)s: %(lowerlevelname)s: %(message)s"
+ fmtr = _LowerLevelNameFormatter(fmt=fmt)
+ hdlr.setFormatter(fmtr)
+ logging.root.addHandler(hdlr)
+
+class _NoReflowFormatter(optparse.IndentedHelpFormatter):
+ """An optparse formatter that does NOT reflow the description."""
+ def format_description(self, description):
+ return description or ""
+
+def mtime(path):
+ return os.stat(path).st_mtime
+
+
+
+#---- mainline
+
+def main(argv=sys.argv):
+ _setup_logging()
+ log.setLevel(logging.INFO)
+
+ # Parse options.
+ parser = optparse.OptionParser(prog="nodedoc", usage='',
+ version="%prog " + __version__, description=__doc__,
+ formatter=_NoReflowFormatter())
+ parser.add_option("-v", "--verbose", dest="log_level",
+ action="store_const", const=logging.DEBUG,
+ help="more verbose output")
+ parser.add_option("-q", "--quiet", dest="log_level",
+ action="store_const", const=logging.WARNING,
+ help="quieter output (just warnings and errors)")
+ parser.add_option("-l", "--list", action="store_true",
+ help="list all nodedoc sections")
+ parser.set_defaults(log_level=logging.INFO)
+ opts, sections = parser.parse_args()
+ log.setLevel(opts.log_level)
+
+ if opts.list:
+ print '\n'.join(nodedoc_sections())
+ elif len(sections) == 0:
+ parser.print_help()
+ elif len(sections) > 1:
+ log.error("too many arguments: %s", ' '.join(sections))
+ else:
+ return nodedoc(sections[0])
+
+
+## {{{ http://code.activestate.com/recipes/577258/ (r4)
+if __name__ == "__main__":
+ try:
+ retval = main(sys.argv)
+ except KeyboardInterrupt:
+ sys.exit(1)
+ except SystemExit:
+ raise
+ except:
+ import traceback, logging
+ if not log.handlers and not logging.root.handlers:
+ logging.basicConfig()
+ skip_it = False
+ exc_info = sys.exc_info()
+ if hasattr(exc_info[0], "__name__"):
+ exc_class, exc, tb = exc_info
+ if isinstance(exc, IOError) and exc.args[0] == 32:
+ # Skip 'IOError: [Errno 32] Broken pipe': often a cancelling of `less`.
+ skip_it = True
+ if not skip_it:
+ tb_path, tb_lineno, tb_func = traceback.extract_tb(tb)[-1][:3]
+ log.error("%s (%s:%s in %s)", exc_info[1], tb_path,
+ tb_lineno, tb_func)
+ else: # string exception
+ log.error(exc_info[0])
+ if not skip_it:
+ if log.isEnabledFor(logging.DEBUG):
+ print()
+ traceback.print_exception(*exc_info)
+ sys.exit(1)
+ else:
+ sys.exit(retval)
+## end of http://code.activestate.com/recipes/577258/ }}}
2 deps/README.md
@@ -0,0 +1,2 @@
+markdown2.py # from https://github.com/trentm/python-markdown2
+appdirs.py # from https://github.com/ActiveState/appdirs
346 deps/appdirs.py
@@ -0,0 +1,346 @@
+#!/usr/bin/env python
+# Copyright (c) 2005-2010 ActiveState Software Inc.
+
+"""Utilities for determining application-specific dirs.
+
+See <http://github.com/ActiveState/appdirs> for details and usage.
+"""
+# Dev Notes:
+# - MSDN on where to store app data files:
+# http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120
+# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html
+# - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
+
+__version_info__ = (1, 2, 0)
+__version__ = '.'.join(map(str, __version_info__))
+
+
+import sys
+import os
+
+PY3 = sys.version_info[0] == 3
+
+if PY3:
+ unicode = str
+
+class AppDirsError(Exception):
+ pass
+
+
+
+def user_data_dir(appname, appauthor=None, version=None, roaming=False):
+ r"""Return full path to the user-specific data dir for this application.
+
+ "appname" is the name of application.
+ "appauthor" (only required and used on Windows) is the name of the
+ appauthor or distributing body for this application. Typically
+ it is the owning company name.
+ "version" is an optional version path element to append to the
+ path. You might want to use this if you want multiple versions
+ of your app to be able to run independently. If used, this
+ would typically be "<major>.<minor>".
+ "roaming" (boolean, default False) can be set True to use the Windows
+ roaming appdata directory. That means that for users on a Windows
+ network setup for roaming profiles, this user data will be
+ sync'd on login. See
+ <http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
+ for a discussion of issues.
+
+ Typical user data directories are:
+ Mac OS X: ~/Library/Application Support/<AppName>
+ Unix: ~/.config/<appname> # or in $XDG_CONFIG_HOME if defined
+ Win XP (not roaming): C:\Documents and Settings\<username>\Application Data\<AppAuthor>\<AppName>
+ Win XP (roaming): C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>
+ Win 7 (not roaming): C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>
+ Win 7 (roaming): C:\Users\<username>\AppData\Roaming\<AppAuthor>\<AppName>
+
+ For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME. We don't
+ use $XDG_DATA_HOME as that data dir is mostly used at the time of
+ installation, instead of the application adding data during runtime.
+ Also, in practice, Linux apps tend to store their data in
+ "~/.config/<appname>" instead of "~/.local/share/<appname>".
+ """
+ if sys.platform.startswith("win"):
+ if appauthor is None:
+ raise AppDirsError("must specify 'appauthor' on Windows")
+ const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA"
+ path = os.path.join(_get_win_folder(const), appauthor, appname)
+ elif sys.platform == 'darwin':
+ path = os.path.join(
+ os.path.expanduser('~/Library/Application Support/'),
+ appname)
+ else:
+ path = os.path.join(
+ os.getenv('XDG_CONFIG_HOME', os.path.expanduser("~/.config")),
+ appname.lower())
+ if version:
+ path = os.path.join(path, version)
+ return path
+
+
+def site_data_dir(appname, appauthor=None, version=None):
+ """Return full path to the user-shared data dir for this application.
+
+ "appname" is the name of application.
+ "appauthor" (only required and used on Windows) is the name of the
+ appauthor or distributing body for this application. Typically
+ it is the owning company name.
+ "version" is an optional version path element to append to the
+ path. You might want to use this if you want multiple versions
+ of your app to be able to run independently. If used, this
+ would typically be "<major>.<minor>".
+
+ Typical user data directories are:
+ Mac OS X: /Library/Application Support/<AppName>
+ Unix: /etc/xdg/<appname>
+ Win XP: C:\Documents and Settings\All Users\Application Data\<AppAuthor>\<AppName>
+ Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
+ Win 7: C:\ProgramData\<AppAuthor>\<AppName> # Hidden, but writeable on Win 7.
+
+ For Unix, this is using the $XDG_CONFIG_DIRS[0] default.
+
+ WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
+ """
+ if sys.platform.startswith("win"):
+ if appauthor is None:
+ raise AppDirsError("must specify 'appauthor' on Windows")
+ path = os.path.join(_get_win_folder("CSIDL_COMMON_APPDATA"),
+ appauthor, appname)
+ elif sys.platform == 'darwin':
+ path = os.path.join(
+ os.path.expanduser('/Library/Application Support'),
+ appname)
+ else:
+ # XDG default for $XDG_CONFIG_DIRS[0]. Perhaps should actually
+ # *use* that envvar, if defined.
+ path = "/etc/xdg/"+appname.lower()
+ if version:
+ path = os.path.join(path, version)
+ return path
+
+
+def user_cache_dir(appname, appauthor=None, version=None, opinion=True):
+ r"""Return full path to the user-specific cache dir for this application.
+
+ "appname" is the name of application.
+ "appauthor" (only required and used on Windows) is the name of the
+ appauthor or distributing body for this application. Typically
+ it is the owning company name.
+ "version" is an optional version path element to append to the
+ path. You might want to use this if you want multiple versions
+ of your app to be able to run independently. If used, this
+ would typically be "<major>.<minor>".
+ "opinion" (boolean) can be False to disable the appending of
+ "Cache" to the base app data dir for Windows. See
+ discussion below.
+
+ Typical user cache directories are:
+ Mac OS X: ~/Library/Caches/<AppName>
+ Unix: ~/.cache/<appname> (XDG default)
+ Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Cache
+ Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache
+
+ On Windows the only suggestion in the MSDN docs is that local settings go in
+ the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming
+ app data dir (the default returned by `user_data_dir` above). Apps typically
+ put cache data somewhere *under* the given dir here. Some examples:
+ ...\Mozilla\Firefox\Profiles\<ProfileName>\Cache
+ ...\Acme\SuperApp\Cache\1.0
+ OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value.
+ This can be disabled with the `opinion=False` option.
+ """
+ if sys.platform.startswith("win"):
+ if appauthor is None:
+ raise AppDirsError("must specify 'appauthor' on Windows")
+ path = os.path.join(_get_win_folder("CSIDL_LOCAL_APPDATA"),
+ appauthor, appname)
+ if opinion:
+ path = os.path.join(path, "Cache")
+ elif sys.platform == 'darwin':
+ path = os.path.join(
+ os.path.expanduser('~/Library/Caches'),
+ appname)
+ else:
+ path = os.path.join(
+ os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache')),
+ appname.lower())
+ if version:
+ path = os.path.join(path, version)
+ return path
+
+def user_log_dir(appname, appauthor=None, version=None, opinion=True):
+ r"""Return full path to the user-specific log dir for this application.
+
+ "appname" is the name of application.
+ "appauthor" (only required and used on Windows) is the name of the
+ appauthor or distributing body for this application. Typically
+ it is the owning company name.
+ "version" is an optional version path element to append to the
+ path. You might want to use this if you want multiple versions
+ of your app to be able to run independently. If used, this
+ would typically be "<major>.<minor>".
+ "opinion" (boolean) can be False to disable the appending of
+ "Logs" to the base app data dir for Windows, and "log" to the
+ base cache dir for Unix. See discussion below.
+
+ Typical user cache directories are:
+ Mac OS X: ~/Library/Logs/<AppName>
+ Unix: ~/.cache/<appname>/log # or under $XDG_CACHE_HOME if defined
+ Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Logs
+ Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Logs
+
+ On Windows the only suggestion in the MSDN docs is that local settings
+ go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in
+ examples of what some windows apps use for a logs dir.)
+
+ OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA`
+ value for Windows and appends "log" to the user cache dir for Unix.
+ This can be disabled with the `opinion=False` option.
+ """
+ if sys.platform == "darwin":
+ path = os.path.join(
+ os.path.expanduser('~/Library/Logs'),
+ appname)
+ elif sys.platform == "win32":
+ path = user_data_dir(appname, appauthor, version); version=False
+ if opinion:
+ path = os.path.join(path, "Logs")
+ else:
+ path = user_cache_dir(appname, appauthor, version); version=False
+ if opinion:
+ path = os.path.join(path, "log")
+ if version:
+ path = os.path.join(path, version)
+ return path
+
+
+class AppDirs(object):
+ """Convenience wrapper for getting application dirs."""
+ def __init__(self, appname, appauthor, version=None, roaming=False):
+ self.appname = appname
+ self.appauthor = appauthor
+ self.version = version
+ self.roaming = roaming
+ @property
+ def user_data_dir(self):
+ return user_data_dir(self.appname, self.appauthor,
+ version=self.version, roaming=self.roaming)
+ @property
+ def site_data_dir(self):
+ return site_data_dir(self.appname, self.appauthor,
+ version=self.version)
+ @property
+ def user_cache_dir(self):
+ return user_cache_dir(self.appname, self.appauthor,
+ version=self.version)
+ @property
+ def user_log_dir(self):
+ return user_log_dir(self.appname, self.appauthor,
+ version=self.version)
+
+
+
+
+#---- internal support stuff
+
+def _get_win_folder_from_registry(csidl_name):
+ """This is a fallback technique at best. I'm not sure if using the
+ registry for this guarantees us the correct answer for all CSIDL_*
+ names.
+ """
+ import _winreg
+
+ shell_folder_name = {
+ "CSIDL_APPDATA": "AppData",
+ "CSIDL_COMMON_APPDATA": "Common AppData",
+ "CSIDL_LOCAL_APPDATA": "Local AppData",
+ }[csidl_name]
+
+ key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
+ r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders")
+ dir, type = _winreg.QueryValueEx(key, shell_folder_name)
+ return dir
+
+def _get_win_folder_with_pywin32(csidl_name):
+ from win32com.shell import shellcon, shell
+ dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0)
+ # Try to make this a unicode path because SHGetFolderPath does
+ # not return unicode strings when there is unicode data in the
+ # path.
+ try:
+ dir = unicode(dir)
+
+ # Downgrade to short path name if have highbit chars. See
+ # <http://bugs.activestate.com/show_bug.cgi?id=85099>.
+ has_high_char = False
+ for c in dir:
+ if ord(c) > 255:
+ has_high_char = True
+ break
+ if has_high_char:
+ try:
+ import win32api
+ dir = win32api.GetShortPathName(dir)
+ except ImportError:
+ pass
+ except UnicodeError:
+ pass
+ return dir
+
+def _get_win_folder_with_ctypes(csidl_name):
+ import ctypes
+
+ csidl_const = {
+ "CSIDL_APPDATA": 26,
+ "CSIDL_COMMON_APPDATA": 35,
+ "CSIDL_LOCAL_APPDATA": 28,
+ }[csidl_name]
+
+ buf = ctypes.create_unicode_buffer(1024)
+ ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf)
+
+ # Downgrade to short path name if have highbit chars. See
+ # <http://bugs.activestate.com/show_bug.cgi?id=85099>.
+ has_high_char = False
+ for c in buf:
+ if ord(c) > 255:
+ has_high_char = True
+ break
+ if has_high_char:
+ buf2 = ctypes.create_unicode_buffer(1024)
+ if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024):
+ buf = buf2
+
+ return buf.value
+
+if sys.platform == "win32":
+ try:
+ import win32com.shell
+ _get_win_folder = _get_win_folder_with_pywin32
+ except ImportError:
+ try:
+ import ctypes
+ _get_win_folder = _get_win_folder_with_ctypes
+ except ImportError:
+ _get_win_folder = _get_win_folder_from_registry
+
+
+
+#---- self test code
+
+if __name__ == "__main__":
+ appname = "MyApp"
+ appauthor = "MyCompany"
+
+ props = ("user_data_dir", "site_data_dir", "user_cache_dir",
+ "user_log_dir")
+
+ print("-- app dirs (without optional 'version')")
+ dirs = AppDirs(appname, appauthor, version="1.0")
+ for prop in props:
+ print("%s: %s" % (prop, getattr(dirs, prop)))
+
+ print("\n-- app dirs (with optional 'version')")
+ dirs = AppDirs(appname, appauthor)
+ for prop in props:
+ print("%s: %s" % (prop, getattr(dirs, prop)))
BIN deps/appdirs.pyc
Binary file not shown.
2,299 deps/markdown2.py
@@ -0,0 +1,2299 @@
+#!/usr/bin/env python
+# Copyright (c) 2012 Trent Mick.
+# Copyright (c) 2007-2008 ActiveState Corp.
+# License: MIT (http://www.opensource.org/licenses/mit-license.php)
+
+from __future__ import generators
+
+r"""A fast and complete Python implementation of Markdown.
+
+[from http://daringfireball.net/projects/markdown/]
+> Markdown is a text-to-HTML filter; it translates an easy-to-read /
+> easy-to-write structured text format into HTML. Markdown's text
+> format is most similar to that of plain text email, and supports
+> features such as headers, *emphasis*, code blocks, blockquotes, and
+> links.
+>
+> Markdown's syntax is designed not as a generic markup language, but
+> specifically to serve as a front-end to (X)HTML. You can use span-level
+> HTML tags anywhere in a Markdown document, and you can use block level
+> HTML tags (like <div> and <table> as well).
+
+Module usage:
+
+ >>> import markdown2
+ >>> markdown2.markdown("*boo!*") # or use `html = markdown_path(PATH)`
+ u'<p><em>boo!</em></p>\n'
+
+ >>> markdowner = Markdown()
+ >>> markdowner.convert("*boo!*")
+ u'<p><em>boo!</em></p>\n'
+ >>> markdowner.convert("**boom!**")
+ u'<p><strong>boom!</strong></p>\n'
+
+This implementation of Markdown implements the full "core" syntax plus a
+number of extras (e.g., code syntax coloring, footnotes) as described on
+<https://github.com/trentm/python-markdown2/wiki/Extras>.
+"""
+
+cmdln_desc = """A fast and complete Python implementation of Markdown, a
+text-to-HTML conversion tool for web writers.
+
+Supported extra syntax options (see -x|--extras option below and
+see <https://github.com/trentm/python-markdown2/wiki/Extras> for details):
+
+* code-friendly: Disable _ and __ for em and strong.
+* cuddled-lists: Allow lists to be cuddled to the preceding paragraph.
+* fenced-code-blocks: Allows a code block to not have to be indented
+ by fencing it with '```' on a line before and after. Based on
+ <http://github.github.com/github-flavored-markdown/> with support for
+ syntax highlighting.
+* footnotes: Support footnotes as in use on daringfireball.net and
+ implemented in other Markdown processors (tho not in Markdown.pl v1.0.1).
+* header-ids: Adds "id" attributes to headers. The id value is a slug of
+ the header text.
+* html-classes: Takes a dict mapping html tag names (lowercase) to a
+ string to use for a "class" tag attribute. Currently only supports
+ "pre" and "code" tags. Add an issue if you require this for other tags.
+* markdown-in-html: Allow the use of `markdown="1"` in a block HTML tag to
+ have markdown processing be done on its contents. Similar to
+ <http://michelf.com/projects/php-markdown/extra/#markdown-attr> but with
+ some limitations.
+* metadata: Extract metadata from a leading '---'-fenced block.
+ See <https://github.com/trentm/python-markdown2/issues/77> for details.
+* pyshell: Treats unindented Python interactive shell sessions as <code>
+ blocks.
+* link-patterns: Auto-link given regex patterns in text (e.g. bug number
+ references, revision number references).
+* smarty-pants: Replaces ' and " with curly quotation marks or curly
+ apostrophes. Replaces --, ---, ..., and . . . with en dashes, em dashes,
+ and ellipses.
+* toc: The returned HTML string gets a new "toc_html" attribute which is
+ a Table of Contents for the document. (experimental)
+* xml: Passes one-liner processing instructions and namespaced XML tags.
+* wiki-tables: Google Code Wiki-style tables. See
+ <http://code.google.com/p/support/wiki/WikiSyntax#Tables>.
+"""
+
+# Dev Notes:
+# - Python's regex syntax doesn't have '\z', so I'm using '\Z'. I'm
+# not yet sure if there implications with this. Compare 'pydoc sre'
+# and 'perldoc perlre'.
+
+__version_info__ = (1, 4, 3)
+__version__ = '.'.join(map(str, __version_info__))
+__author__ = "Trent Mick"
+
+import os
+import sys
+from pprint import pprint
+import re
+import logging
+try:
+ from hashlib import md5
+except ImportError:
+ from md5 import md5
+import optparse
+from random import random, randint
+import codecs
+
+
+#---- Python version compat
+
+try:
+ from urllib.parse import quote # python3
+except ImportError:
+ from urllib import quote # python2
+
+if sys.version_info[:2] < (2,4):
+ from sets import Set as set
+ def reversed(sequence):
+ for i in sequence[::-1]:
+ yield i
+
+# Use `bytes` for byte strings and `unicode` for unicode strings (str in Py3).
+if sys.version_info[0] <= 2:
+ py3 = False
+ try:
+ bytes
+ except NameError:
+ bytes = str
+ base_string_type = basestring
+elif sys.version_info[0] >= 3:
+ py3 = True
+ unicode = str
+ base_string_type = str
+
+
+
+#---- globals
+
+DEBUG = False
+log = logging.getLogger("markdown")
+
+DEFAULT_TAB_WIDTH = 4
+
+
+SECRET_SALT = bytes(randint(0, 1000000))
+def _hash_text(s):
+ return 'md5-' + md5(SECRET_SALT + s.encode("utf-8")).hexdigest()
+
+# Table of hash values for escaped characters:
+g_escape_table = dict([(ch, _hash_text(ch))
+ for ch in '\\`*_{}[]()>#+-.!'])
+
+
+
+#---- exceptions
+
+class MarkdownError(Exception):
+ pass
+
+
+
+#---- public api
+
+def markdown_path(path, encoding="utf-8",
+ html4tags=False, tab_width=DEFAULT_TAB_WIDTH,
+ safe_mode=None, extras=None, link_patterns=None,
+ use_file_vars=False):
+ fp = codecs.open(path, 'r', encoding)
+ text = fp.read()
+ fp.close()
+ return Markdown(html4tags=html4tags, tab_width=tab_width,
+ safe_mode=safe_mode, extras=extras,
+ link_patterns=link_patterns,
+ use_file_vars=use_file_vars).convert(text)
+
+def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH,
+ safe_mode=None, extras=None, link_patterns=None,
+ use_file_vars=False):
+ return Markdown(html4tags=html4tags, tab_width=tab_width,
+ safe_mode=safe_mode, extras=extras,
+ link_patterns=link_patterns,
+ use_file_vars=use_file_vars).convert(text)
+
+class Markdown(object):
+ # The dict of "extras" to enable in processing -- a mapping of
+ # extra name to argument for the extra. Most extras do not have an
+ # argument, in which case the value is None.
+ #
+ # This can be set via (a) subclassing and (b) the constructor
+ # "extras" argument.
+ extras = None
+
+ urls = None
+ titles = None
+ html_blocks = None
+ html_spans = None
+ html_removed_text = "[HTML_REMOVED]" # for compat with markdown.py
+
+ # Used to track when we're inside an ordered or unordered list
+ # (see _ProcessListItems() for details):
+ list_level = 0
+
+ _ws_only_line_re = re.compile(r"^[ \t]+$", re.M)
+
+ def __init__(self, html4tags=False, tab_width=4, safe_mode=None,
+ extras=None, link_patterns=None, use_file_vars=False):
+ if html4tags:
+ self.empty_element_suffix = ">"
+ else:
+ self.empty_element_suffix = " />"
+ self.tab_width = tab_width
+
+ # For compatibility with earlier markdown2.py and with
+ # markdown.py's safe_mode being a boolean,
+ # safe_mode == True -> "replace"
+ if safe_mode is True:
+ self.safe_mode = "replace"
+ else:
+ self.safe_mode = safe_mode
+
+ # Massaging and building the "extras" info.
+ if self.extras is None:
+ self.extras = {}
+ elif not isinstance(self.extras, dict):
+ self.extras = dict([(e, None) for e in self.extras])
+ if extras:
+ if not isinstance(extras, dict):
+ extras = dict([(e, None) for e in extras])
+ self.extras.update(extras)
+ assert isinstance(self.extras, dict)
+ if "toc" in self.extras and not "header-ids" in self.extras:
+ self.extras["header-ids"] = None # "toc" implies "header-ids"
+ self._instance_extras = self.extras.copy()
+
+ self.link_patterns = link_patterns
+ self.use_file_vars = use_file_vars
+ self._outdent_re = re.compile(r'^(\t|[ ]{1,%d})' % tab_width, re.M)
+
+ self._escape_table = g_escape_table.copy()
+ if "smarty-pants" in self.extras:
+ self._escape_table['"'] = _hash_text('"')
+ self._escape_table["'"] = _hash_text("'")
+
+ def reset(self):
+ self.urls = {}
+ self.titles = {}
+ self.html_blocks = {}
+ self.html_spans = {}
+ self.list_level = 0
+ self.extras = self._instance_extras.copy()
+ if "footnotes" in self.extras:
+ self.footnotes = {}
+ self.footnote_ids = []
+ if "header-ids" in self.extras:
+ self._count_from_header_id = {} # no `defaultdict` in Python 2.4
+ if "metadata" in self.extras:
+ self.metadata = {}
+
+ def convert(self, text):
+ """Convert the given text."""
+ # Main function. The order in which other subs are called here is
+ # essential. Link and image substitutions need to happen before
+ # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
+ # and <img> tags get encoded.
+
+ # Clear the global hashes. If we don't clear these, you get conflicts
+ # from other articles when generating a page which contains more than
+ # one article (e.g. an index page that shows the N most recent
+ # articles):
+ self.reset()
+
+ if not isinstance(text, unicode):
+ #TODO: perhaps shouldn't presume UTF-8 for string input?
+ text = unicode(text, 'utf-8')
+
+ if self.use_file_vars:
+ # Look for emacs-style file variable hints.
+ emacs_vars = self._get_emacs_vars(text)
+ if "markdown-extras" in emacs_vars:
+ splitter = re.compile("[ ,]+")
+ for e in splitter.split(emacs_vars["markdown-extras"]):
+ if '=' in e:
+ ename, earg = e.split('=', 1)
+ try:
+ earg = int(earg)
+ except ValueError:
+ pass
+ else:
+ ename, earg = e, None
+ self.extras[ename] = earg
+
+ # Standardize line endings:
+ text = re.sub("\r\n|\r", "\n", text)
+
+ # Make sure $text ends with a couple of newlines:
+ text += "\n\n"
+
+ # Convert all tabs to spaces.
+ text = self._detab(text)
+
+ # Strip any lines consisting only of spaces and tabs.
+ # This makes subsequent regexen easier to write, because we can
+ # match consecutive blank lines with /\n+/ instead of something
+ # contorted like /[ \t]*\n+/ .
+ text = self._ws_only_line_re.sub("", text)
+
+ # strip metadata from head and extract
+ if "metadata" in self.extras:
+ text = self._extract_metadata(text)
+
+ if self.safe_mode:
+ text = self._hash_html_spans(text)
+
+ # Turn block-level HTML blocks into hash entries
+ text = self._hash_html_blocks(text, raw=True)
+
+ # Strip link definitions, store in hashes.
+ if "footnotes" in self.extras:
+ # Must do footnotes first because an unlucky footnote defn
+ # looks like a link defn:
+ # [^4]: this "looks like a link defn"
+ text = self._strip_footnote_definitions(text)
+ text = self._strip_link_definitions(text)
+
+ text = self._run_block_gamut(text)
+
+ if "footnotes" in self.extras:
+ text = self._add_footnotes(text)
+
+ text = self.postprocess(text)
+
+ text = self._unescape_special_chars(text)
+
+ if self.safe_mode:
+ text = self._unhash_html_spans(text)
+
+ text += "\n"
+
+ rv = UnicodeWithAttrs(text)
+ if "toc" in self.extras:
+ rv._toc = self._toc
+ if "metadata" in self.extras:
+ rv.metadata = self.metadata
+ return rv
+
+ def postprocess(self, text):
+ """A hook for subclasses to do some postprocessing of the html, if
+ desired. This is called before unescaping of special chars and
+ unhashing of raw HTML spans.
+ """
+ return text
+
+ # Is metadata if the content starts with '---'-fenced `key: value`
+ # pairs. E.g. (indented for presentation):
+ # ---
+ # foo: bar
+ # another-var: blah blah
+ # ---
+ _metadata_pat = re.compile("""^---[ \t]*\n((?:[ \t]*[^ \t:]+[ \t]*:[^\n]*\n)+)---[ \t]*\n""")
+
+ def _extract_metadata(self, text):
+ # fast test
+ if not text.startswith("---"):
+ return text
+ match = self._metadata_pat.match(text)
+ if not match:
+ return text
+
+ tail = text[len(match.group(0)):]
+ metadata_str = match.group(1).strip()
+ for line in metadata_str.split('\n'):
+ key, value = line.split(':', 1)
+ self.metadata[key.strip()] = value.strip()
+
+ return tail
+
+
+ _emacs_oneliner_vars_pat = re.compile(r"-\*-\s*([^\r\n]*?)\s*-\*-", re.UNICODE)
+ # This regular expression is intended to match blocks like this:
+ # PREFIX Local Variables: SUFFIX
+ # PREFIX mode: Tcl SUFFIX
+ # PREFIX End: SUFFIX
+ # Some notes:
+ # - "[ \t]" is used instead of "\s" to specifically exclude newlines
+ # - "(\r\n|\n|\r)" is used instead of "$" because the sre engine does
+ # not like anything other than Unix-style line terminators.
+ _emacs_local_vars_pat = re.compile(r"""^
+ (?P<prefix>(?:[^\r\n|\n|\r])*?)
+ [\ \t]*Local\ Variables:[\ \t]*
+ (?P<suffix>.*?)(?:\r\n|\n|\r)
+ (?P<content>.*?\1End:)
+ """, re.IGNORECASE | re.MULTILINE | re.DOTALL | re.VERBOSE)
+
+ def _get_emacs_vars(self, text):
+ """Return a dictionary of emacs-style local variables.
+
+ Parsing is done loosely according to this spec (and according to
+ some in-practice deviations from this):
+ http://www.gnu.org/software/emacs/manual/html_node/emacs/Specifying-File-Variables.html#Specifying-File-Variables
+ """
+ emacs_vars = {}
+ SIZE = pow(2, 13) # 8kB
+
+ # Search near the start for a '-*-'-style one-liner of variables.
+ head = text[:SIZE]
+ if "-*-" in head:
+ match = self._emacs_oneliner_vars_pat.search(head)
+ if match:
+ emacs_vars_str = match.group(1)
+ assert '\n' not in emacs_vars_str
+ emacs_var_strs = [s.strip() for s in emacs_vars_str.split(';')
+ if s.strip()]
+ if len(emacs_var_strs) == 1 and ':' not in emacs_var_strs[0]:
+ # While not in the spec, this form is allowed by emacs:
+ # -*- Tcl -*-
+ # where the implied "variable" is "mode". This form
+ # is only allowed if there are no other variables.
+ emacs_vars["mode"] = emacs_var_strs[0].strip()
+ else:
+ for emacs_var_str in emacs_var_strs:
+ try:
+ variable, value = emacs_var_str.strip().split(':', 1)
+ except ValueError:
+ log.debug("emacs variables error: malformed -*- "
+ "line: %r", emacs_var_str)
+ continue
+ # Lowercase the variable name because Emacs allows "Mode"
+ # or "mode" or "MoDe", etc.
+ emacs_vars[variable.lower()] = value.strip()
+
+ tail = text[-SIZE:]
+ if "Local Variables" in tail:
+ match = self._emacs_local_vars_pat.search(tail)
+ if match:
+ prefix = match.group("prefix")
+ suffix = match.group("suffix")
+ lines = match.group("content").splitlines(0)
+ #print "prefix=%r, suffix=%r, content=%r, lines: %s"\
+ # % (prefix, suffix, match.group("content"), lines)
+
+ # Validate the Local Variables block: proper prefix and suffix
+ # usage.
+ for i, line in enumerate(lines):
+ if not line.startswith(prefix):
+ log.debug("emacs variables error: line '%s' "
+ "does not use proper prefix '%s'"
+ % (line, prefix))
+ return {}
+ # Don't validate suffix on last line. Emacs doesn't care,
+ # neither should we.
+ if i != len(lines)-1 and not line.endswith(suffix):
+ log.debug("emacs variables error: line '%s' "
+ "does not use proper suffix '%s'"
+ % (line, suffix))
+ return {}
+
+ # Parse out one emacs var per line.
+ continued_for = None
+ for line in lines[:-1]: # no var on the last line ("PREFIX End:")
+ if prefix: line = line[len(prefix):] # strip prefix
+ if suffix: line = line[:-len(suffix)] # strip suffix
+ line = line.strip()
+ if continued_for:
+ variable = continued_for
+ if line.endswith('\\'):
+ line = line[:-1].rstrip()
+ else:
+ continued_for = None
+ emacs_vars[variable] += ' ' + line
+ else:
+ try:
+ variable, value = line.split(':', 1)
+ except ValueError:
+ log.debug("local variables error: missing colon "
+ "in local variables entry: '%s'" % line)
+ continue
+ # Do NOT lowercase the variable name, because Emacs only
+ # allows "mode" (and not "Mode", "MoDe", etc.) in this block.
+ value = value.strip()
+ if value.endswith('\\'):
+ value = value[:-1].rstrip()
+ continued_for = variable
+ else:
+ continued_for = None
+ emacs_vars[variable] = value
+
+ # Unquote values.
+ for var, val in list(emacs_vars.items()):
+ if len(val) > 1 and (val.startswith('"') and val.endswith('"')
+ or val.startswith('"') and val.endswith('"')):
+ emacs_vars[var] = val[1:-1]
+
+ return emacs_vars
+
+ # Cribbed from a post by Bart Lateur:
+ # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
+ _detab_re = re.compile(r'(.*?)\t', re.M)
+ def _detab_sub(self, match):
+ g1 = match.group(1)
+ return g1 + (' ' * (self.tab_width - len(g1) % self.tab_width))
+ def _detab(self, text):
+ r"""Remove (leading?) tabs from a file.
+
+ >>> m = Markdown()
+ >>> m._detab("\tfoo")
+ ' foo'
+ >>> m._detab(" \tfoo")
+ ' foo'
+ >>> m._detab("\t foo")
+ ' foo'
+ >>> m._detab(" foo")
+ ' foo'
+ >>> m._detab(" foo\n\tbar\tblam")
+ ' foo\n bar blam'
+ """
+ if '\t' not in text:
+ return text
+ return self._detab_re.subn(self._detab_sub, text)[0]
+
+ # I broke out the html5 tags here and add them to _block_tags_a and
+ # _block_tags_b. This way html5 tags are easy to keep track of.
+ _html5tags = '|article|aside|header|hgroup|footer|nav|section|figure|figcaption'
+
+ _block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del'
+ _block_tags_a += _html5tags
+
+ _strict_tag_block_re = re.compile(r"""
+ ( # save in \1
+ ^ # start of line (with re.M)
+ <(%s) # start tag = \2
+ \b # word break
+ (.*\n)*? # any number of lines, minimally matching
+ </\2> # the matching end tag
+ [ \t]* # trailing spaces/tabs
+ (?=\n+|\Z) # followed by a newline or end of document
+ )
+ """ % _block_tags_a,
+ re.X | re.M)
+
+ _block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math'
+ _block_tags_b += _html5tags
+
+ _liberal_tag_block_re = re.compile(r"""
+ ( # save in \1
+ ^ # start of line (with re.M)
+ <(%s) # start tag = \2
+ \b # word break
+ (.*\n)*? # any number of lines, minimally matching
+ .*</\2> # the matching end tag
+ [ \t]* # trailing spaces/tabs
+ (?=\n+|\Z) # followed by a newline or end of document
+ )
+ """ % _block_tags_b,
+ re.X | re.M)
+
+ _html_markdown_attr_re = re.compile(
+ r'''\s+markdown=("1"|'1')''')
+ def _hash_html_block_sub(self, match, raw=False):
+ html = match.group(1)
+ if raw and self.safe_mode:
+ html = self._sanitize_html(html)
+ elif 'markdown-in-html' in self.extras and 'markdown=' in html:
+ first_line = html.split('\n', 1)[0]
+ m = self._html_markdown_attr_re.search(first_line)
+ if m:
+ lines = html.split('\n')
+ middle = '\n'.join(lines[1:-1])
+ last_line = lines[-1]
+ first_line = first_line[:m.start()] + first_line[m.end():]
+ f_key = _hash_text(first_line)
+ self.html_blocks[f_key] = first_line
+ l_key = _hash_text(last_line)
+ self.html_blocks[l_key] = last_line
+ return ''.join(["\n\n", f_key,
+ "\n\n", middle, "\n\n",
+ l_key, "\n\n"])
+ key = _hash_text(html)
+ self.html_blocks[key] = html
+ return "\n\n" + key + "\n\n"
+
+ def _hash_html_blocks(self, text, raw=False):
+ """Hashify HTML blocks
+
+ We only want to do this for block-level HTML tags, such as headers,
+ lists, and tables. That's because we still want to wrap <p>s around
+ "paragraphs" that are wrapped in non-block-level tags, such as anchors,
+ phrase emphasis, and spans. The list of tags we're looking for is
+ hard-coded.
+
+ @param raw {boolean} indicates if these are raw HTML blocks in
+ the original source. It makes a difference in "safe" mode.
+ """
+ if '<' not in text:
+ return text
+
+ # Pass `raw` value into our calls to self._hash_html_block_sub.
+ hash_html_block_sub = _curry(self._hash_html_block_sub, raw=raw)
+
+ # First, look for nested blocks, e.g.:
+ # <div>
+ # <div>
+ # tags for inner block must be indented.
+ # </div>
+ # </div>
+ #
+ # The outermost tags must start at the left margin for this to match, and
+ # the inner nested divs must be indented.
+ # We need to do this before the next, more liberal match, because the next
+ # match will start at the first `<div>` and stop at the first `</div>`.
+ text = self._strict_tag_block_re.sub(hash_html_block_sub, text)
+
+ # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
+ text = self._liberal_tag_block_re.sub(hash_html_block_sub, text)
+
+ # Special case just for <hr />. It was easier to make a special
+ # case than to make the other regex more complicated.
+ if "<hr" in text:
+ _hr_tag_re = _hr_tag_re_from_tab_width(self.tab_width)
+ text = _hr_tag_re.sub(hash_html_block_sub, text)
+
+ # Special case for standalone HTML comments:
+ if "<!--" in text:
+ start = 0
+ while True:
+ # Delimiters for next comment block.
+ try:
+ start_idx = text.index("<!--", start)
+ except ValueError:
+ break
+ try:
+ end_idx = text.index("-->", start_idx) + 3
+ except ValueError:
+ break
+
+ # Start position for next comment block search.
+ start = end_idx
+
+ # Validate whitespace before comment.
+ if start_idx:
+ # - Up to `tab_width - 1` spaces before start_idx.
+ for i in range(self.tab_width - 1):
+ if text[start_idx - 1] != ' ':
+ break
+ start_idx -= 1
+ if start_idx == 0:
+ break
+ # - Must be preceded by 2 newlines or hit the start of
+ # the document.
+ if start_idx == 0:
+ pass
+ elif start_idx == 1 and text[0] == '\n':
+ start_idx = 0 # to match minute detail of Markdown.pl regex
+ elif text[start_idx-2:start_idx] == '\n\n':
+ pass
+ else:
+ break
+
+ # Validate whitespace after comment.
+ # - Any number of spaces and tabs.
+ while end_idx < len(text):
+ if text[end_idx] not in ' \t':
+ break
+ end_idx += 1
+ # - Must be following by 2 newlines or hit end of text.
+ if text[end_idx:end_idx+2] not in ('', '\n', '\n\n'):
+ continue
+
+ # Escape and hash (must match `_hash_html_block_sub`).
+ html = text[start_idx:end_idx]
+ if raw and self.safe_mode:
+ html = self._sanitize_html(html)
+ key = _hash_text(html)
+ self.html_blocks[key] = html
+ text = text[:start_idx] + "\n\n" + key + "\n\n" + text[end_idx:]
+
+ if "xml" in self.extras:
+ # Treat XML processing instructions and namespaced one-liner
+ # tags as if they were block HTML tags. E.g., if standalone
+ # (i.e. are their own paragraph), the following do not get
+ # wrapped in a <p> tag:
+ # <?foo bar?>
+ #
+ # <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="chapter_1.md"/>
+ _xml_oneliner_re = _xml_oneliner_re_from_tab_width(self.tab_width)
+ text = _xml_oneliner_re.sub(hash_html_block_sub, text)
+
+ return text
+
+ def _strip_link_definitions(self, text):
+ # Strips link definitions from text, stores the URLs and titles in
+ # hash references.
+ less_than_tab = self.tab_width - 1
+
+ # Link defs are in the form:
+ # [id]: url "optional title"
+ _link_def_re = re.compile(r"""
+ ^[ ]{0,%d}\[(.+)\]: # id = \1
+ [ \t]*
+ \n? # maybe *one* newline
+ [ \t]*
+ <?(.+?)>? # url = \2
+ [ \t]*
+ (?:
+ \n? # maybe one newline
+ [ \t]*
+ (?<=\s) # lookbehind for whitespace
+ ['"(]
+ ([^\n]*) # title = \3
+ ['")]
+ [ \t]*
+ )? # title is optional
+ (?:\n+|\Z)
+ """ % less_than_tab, re.X | re.M | re.U)
+ return _link_def_re.sub(self._extract_link_def_sub, text)
+
+ def _extract_link_def_sub(self, match):
+ id, url, title = match.groups()
+ key = id.lower() # Link IDs are case-insensitive
+ self.urls[key] = self._encode_amps_and_angles(url)
+ if title:
+ self.titles[key] = title
+ return ""
+
+ def _extract_footnote_def_sub(self, match):
+ id, text = match.groups()
+ text = _dedent(text, skip_first_line=not text.startswith('\n')).strip()
+ normed_id = re.sub(r'\W', '-', id)
+ # Ensure footnote text ends with a couple newlines (for some
+ # block gamut matches).
+ self.footnotes[normed_id] = text + "\n\n"
+ return ""
+
+ def _strip_footnote_definitions(self, text):
+ """A footnote definition looks like this:
+
+ [^note-id]: Text of the note.
+
+ May include one or more indented paragraphs.
+
+ Where,
+ - The 'note-id' can be pretty much anything, though typically it
+ is the number of the footnote.
+ - The first paragraph may start on the next line, like so:
+
+ [^note-id]:
+ Text of the note.
+ """
+ less_than_tab = self.tab_width - 1
+ footnote_def_re = re.compile(r'''
+ ^[ ]{0,%d}\[\^(.+)\]: # id = \1
+ [ \t]*
+ ( # footnote text = \2
+ # First line need not start with the spaces.
+ (?:\s*.*\n+)
+ (?:
+ (?:[ ]{%d} | \t) # Subsequent lines must be indented.
+ .*\n+
+ )*
+ )
+ # Lookahead for non-space at line-start, or end of doc.
+ (?:(?=^[ ]{0,%d}\S)|\Z)
+ ''' % (less_than_tab, self.tab_width, self.tab_width),
+ re.X | re.M)
+ return footnote_def_re.sub(self._extract_footnote_def_sub, text)
+
+
+ _hr_data = [
+ ('*', re.compile(r"^[ ]{0,3}\*(.*?)$", re.M)),
+ ('-', re.compile(r"^[ ]{0,3}\-(.*?)$", re.M)),
+ ('_', re.compile(r"^[ ]{0,3}\_(.*?)$", re.M)),
+ ]
+
+ def _run_block_gamut(self, text):
+ # These are all the transformations that form block-level
+ # tags like paragraphs, headers, and list items.
+
+ if "fenced-code-blocks" in self.extras:
+ text = self._do_fenced_code_blocks(text)
+
+ text = self._do_headers(text)
+
+ # Do Horizontal Rules:
+ # On the number of spaces in horizontal rules: The spec is fuzzy: "If
+ # you wish, you may use spaces between the hyphens or asterisks."
+ # Markdown.pl 1.0.1's hr regexes limit the number of spaces between the
+ # hr chars to one or two. We'll reproduce that limit here.
+ hr = "\n<hr"+self.empty_element_suffix+"\n"
+ for ch, regex in self._hr_data:
+ if ch in text:
+ for m in reversed(list(regex.finditer(text))):
+ tail = m.group(1).rstrip()
+ if not tail.strip(ch + ' ') and tail.count(" ") == 0:
+ start, end = m.span()
+ text = text[:start] + hr + text[end:]
+
+ text = self._do_lists(text)
+
+ if "pyshell" in self.extras:
+ text = self._prepare_pyshell_blocks(text)
+ if "wiki-tables" in self.extras:
+ text = self._do_wiki_tables(text)
+
+ text = self._do_code_blocks(text)
+
+ text = self._do_block_quotes(text)
+
+ # We already ran _HashHTMLBlocks() before, in Markdown(), but that
+ # was to escape raw HTML in the original Markdown source. This time,
+ # we're escaping the markup we've just created, so that we don't wrap
+ # <p> tags around block-level tags.
+ text = self._hash_html_blocks(text)
+
+ text = self._form_paragraphs(text)
+
+ return text
+
+ def _pyshell_block_sub(self, match):
+ lines = match.group(0).splitlines(0)
+ _dedentlines(lines)
+ indent = ' ' * self.tab_width
+ s = ('\n' # separate from possible cuddled paragraph
+ + indent + ('\n'+indent).join(lines)
+ + '\n\n')
+ return s
+
+ def _prepare_pyshell_blocks(self, text):
+ """Ensure that Python interactive shell sessions are put in
+ code blocks -- even if not properly indented.
+ """
+ if ">>>" not in text:
+ return text
+
+ less_than_tab = self.tab_width - 1
+ _pyshell_block_re = re.compile(r"""
+ ^([ ]{0,%d})>>>[ ].*\n # first line
+ ^(\1.*\S+.*\n)* # any number of subsequent lines
+ ^\n # ends with a blank line
+ """ % less_than_tab, re.M | re.X)
+
+ return _pyshell_block_re.sub(self._pyshell_block_sub, text)
+
+ def _wiki_table_sub(self, match):
+ ttext = match.group(0).strip()
+ #print 'wiki table: %r' % match.group(0)
+ rows = []
+ for line in ttext.splitlines(0):
+ line = line.strip()[2:-2].strip()
+ row = [c.strip() for c in re.split(r'(?<!\\)\|\|', line)]
+ rows.append(row)
+ #pprint(rows)
+ hlines = ['<table>', '<tbody>']
+ for row in rows:
+ hrow = ['<tr>']
+ for cell in row:
+ hrow.append('<td>')
+ hrow.append(self._run_span_gamut(cell))
+ hrow.append('</td>')
+ hrow.append('</tr>')
+ hlines.append(''.join(hrow))
+ hlines += ['</tbody>', '</table>']
+ return '\n'.join(hlines) + '\n'
+
+ def _do_wiki_tables(self, text):
+ # Optimization.
+ if "||" not in text:
+ return text
+
+ less_than_tab = self.tab_width - 1
+ wiki_table_re = re.compile(r'''
+ (?:(?<=\n\n)|\A\n?) # leading blank line
+ ^([ ]{0,%d})\|\|.+?\|\|[ ]*\n # first line
+ (^\1\|\|.+?\|\|\n)* # any number of subsequent lines
+ ''' % less_than_tab, re.M | re.X)
+ return wiki_table_re.sub(self._wiki_table_sub, text)
+
+ def _run_span_gamut(self, text):
+ # These are all the transformations that occur *within* block-level
+ # tags like paragraphs, headers, and list items.
+
+ text = self._do_code_spans(text)
+
+ text = self._escape_special_chars(text)
+
+ # Process anchor and image tags.
+ text = self._do_links(text)
+
+ # Make links out of things like `<http://example.com/>`
+ # Must come after _do_links(), because you can use < and >
+ # delimiters in inline links like [this](<url>).
+ text = self._do_auto_links(text)
+
+ if "link-patterns" in self.extras:
+ text = self._do_link_patterns(text)
+
+ text = self._encode_amps_and_angles(text)
+
+ text = self._do_italics_and_bold(text)
+
+ if "smarty-pants" in self.extras:
+ text = self._do_smart_punctuation(text)
+
+ # Do hard breaks:
+ text = re.sub(r" {2,}\n", " <br%s\n" % self.empty_element_suffix, text)
+
+ return text
+
+ # "Sorta" because auto-links are identified as "tag" tokens.
+ _sorta_html_tokenize_re = re.compile(r"""
+ (
+ # tag
+ </?
+ (?:\w+) # tag name
+ (?:\s+(?:[\w-]+:)?[\w-]+=(?:".*?"|'.*?'))* # attributes
+ \s*/?>
+ |
+ # auto-link (e.g., <http://www.activestate.com/>)
+ <\w+[^>]*>
+ |
+ <!--.*?--> # comment
+ |
+ <\?.*?\?> # processing instruction
+ )
+ """, re.X)
+
+ def _escape_special_chars(self, text):
+ # Python markdown note: the HTML tokenization here differs from
+ # that in Markdown.pl, hence the behaviour for subtle cases can
+ # differ (I believe the tokenizer here does a better job because
+ # it isn't susceptible to unmatched '<' and '>' in HTML tags).
+ # Note, however, that '>' is not allowed in an auto-link URL
+ # here.
+ escaped = []
+ is_html_markup = False
+ for token in self._sorta_html_tokenize_re.split(text):
+ if is_html_markup:
+ # Within tags/HTML-comments/auto-links, encode * and _
+ # so they don't conflict with their use in Markdown for
+ # italics and strong. We're replacing each such
+ # character with its corresponding MD5 checksum value;
+ # this is likely overkill, but it should prevent us from
+ # colliding with the escape values by accident.
+ escaped.append(token.replace('*', self._escape_table['*'])
+ .replace('_', self._escape_table['_']))
+ else:
+ escaped.append(self._encode_backslash_escapes(token))
+ is_html_markup = not is_html_markup
+ return ''.join(escaped)
+
+ def _hash_html_spans(self, text):
+ # Used for safe_mode.
+
+ def _is_auto_link(s):
+ if ':' in s and self._auto_link_re.match(s):
+ return True
+ elif '@' in s and self._auto_email_link_re.match(s):
+ return True
+ return False
+
+ tokens = []
+ is_html_markup = False
+ for token in self._sorta_html_tokenize_re.split(text):
+ if is_html_markup and not _is_auto_link(token):
+ sanitized = self._sanitize_html(token)
+ key = _hash_text(sanitized)
+ self.html_spans[key] = sanitized
+ tokens.append(key)
+ else:
+ tokens.append(token)
+ is_html_markup = not is_html_markup
+ return ''.join(tokens)
+
+ def _unhash_html_spans(self, text):
+ for key, sanitized in list(self.html_spans.items()):
+ text = text.replace(key, sanitized)
+ return text
+
+ def _sanitize_html(self, s):
+ if self.safe_mode == "replace":
+ return self.html_removed_text
+ elif self.safe_mode == "escape":
+ replacements = [
+ ('&', '&amp;'),
+ ('<', '&lt;'),
+ ('>', '&gt;'),
+ ]
+ for before, after in replacements:
+ s = s.replace(before, after)
+ return s
+ else:
+ raise MarkdownError("invalid value for 'safe_mode': %r (must be "
+ "'escape' or 'replace')" % self.safe_mode)
+
+ _tail_of_inline_link_re = re.compile(r'''
+ # Match tail of: [text](/url/) or [text](/url/ "title")
+ \( # literal paren
+ [ \t]*
+ (?P<url> # \1
+ <.*?>
+ |
+ .*?
+ )
+ [ \t]*
+ ( # \2
+ (['"]) # quote char = \3
+ (?P<title>.*?)
+ \3 # matching quote
+ )? # title is optional
+ \)
+ ''', re.X | re.S)
+ _tail_of_reference_link_re = re.compile(r'''
+ # Match tail of: [text][id]
+ [ ]? # one optional space
+ (?:\n[ ]*)? # one optional newline followed by spaces
+ \[
+ (?P<id>.*?)
+ \]
+ ''', re.X | re.S)
+
+ def _do_links(self, text):
+ """Turn Markdown link shortcuts into XHTML <a> and <img> tags.
+
+ This is a combination of Markdown.pl's _DoAnchors() and
+ _DoImages(). They are done together because that simplified the
+ approach. It was necessary to use a different approach than
+ Markdown.pl because of the lack of atomic matching support in
+ Python's regex engine used in $g_nested_brackets.
+ """
+ MAX_LINK_TEXT_SENTINEL = 3000 # markdown2 issue 24
+
+ # `anchor_allowed_pos` is used to support img links inside
+ # anchors, but not anchors inside anchors. An anchor's start
+ # pos must be `>= anchor_allowed_pos`.
+ anchor_allowed_pos = 0
+
+ curr_pos = 0
+ while True: # Handle the next link.
+ # The next '[' is the start of:
+ # - an inline anchor: [text](url "title")
+ # - a reference anchor: [text][id]
+ # - an inline img: ![text](url "title")
+ # - a reference img: ![text][id]
+ # - a footnote ref: [^id]
+ # (Only if 'footnotes' extra enabled)
+ # - a footnote defn: [^id]: ...
+ # (Only if 'footnotes' extra enabled) These have already
+ # been stripped in _strip_footnote_definitions() so no
+ # need to watch for them.
+ # - a link definition: [id]: url "title"
+ # These have already been stripped in
+ # _strip_link_definitions() so no need to watch for them.
+ # - not markup: [...anything else...
+ try:
+ start_idx = text.index('[', curr_pos)
+ except ValueError:
+ break
+ text_length = len(text)
+
+ # Find the matching closing ']'.
+ # Markdown.pl allows *matching* brackets in link text so we
+ # will here too. Markdown.pl *doesn't* currently allow
+ # matching brackets in img alt text -- we'll differ in that
+ # regard.
+ bracket_depth = 0
+ for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL,
+ text_length)):
+ ch = text[p]
+ if ch == ']':
+ bracket_depth -= 1
+ if bracket_depth < 0:
+ break
+ elif ch == '[':
+ bracket_depth += 1
+ else:
+ # Closing bracket not found within sentinel length.
+ # This isn't markup.
+ curr_pos = start_idx + 1
+ continue
+ link_text = text[start_idx+1:p]
+
+ # Possibly a footnote ref?
+ if "footnotes" in self.extras and link_text.startswith("^"):
+ normed_id = re.sub(r'\W', '-', link_text[1:])
+ if normed_id in self.footnotes:
+ self.footnote_ids.append(normed_id)
+ result = '<sup class="footnote-ref" id="fnref-%s">' \
+ '<a href="#fn-%s">%s</a></sup>' \
+ % (normed_id, normed_id, len(self.footnote_ids))
+ text = text[:start_idx] + result + text[p+1:]
+ else:
+ # This id isn't defined, leave the markup alone.
+ curr_pos = p+1
+ continue
+
+ # Now determine what this is by the remainder.
+ p += 1
+ if p == text_length:
+ return text
+
+ # Inline anchor or img?
+ if text[p] == '(': # attempt at perf improvement
+ match = self._tail_of_inline_link_re.match(text, p)
+ if match:
+ # Handle an inline anchor or img.
+ is_img = start_idx > 0 and text[start_idx-1] == "!"
+ if is_img:
+ start_idx -= 1
+
+ url, title = match.group("url"), match.group("title")
+ if url and url[0] == '<':
+ url = url[1:-1] # '<url>' -> 'url'
+ # We've got to encode these to avoid conflicting
+ # with italics/bold.
+ url = url.replace('*', self._escape_table['*']) \
+ .replace('_', self._escape_table['_'])
+ if title:
+ title_str = ' title="%s"' % (
+ _xml_escape_attr(title)
+ .replace('*', self._escape_table['*'])
+ .replace('_', self._escape_table['_']))
+ else:
+ title_str = ''
+ if is_img:
+ result = '<img src="%s" alt="%s"%s%s' \
+ % (url.replace('"', '&quot;'),
+ _xml_escape_attr(link_text),
+ title_str, self.empty_element_suffix)
+ if "smarty-pants" in self.extras:
+ result = result.replace('"', self._escape_table['"'])
+ curr_pos = start_idx + len(result)
+ text = text[:start_idx] + result + text[match.end():]
+ elif start_idx >= anchor_allowed_pos:
+ result_head = '<a href="%s"%s>' % (url, title_str)
+ result = '%s%s</a>' % (result_head, link_text)
+ if "smarty-pants" in self.extras:
+ result = result.replace('"', self._escape_table['"'])
+ # <img> allowed from curr_pos on, <a> from
+ # anchor_allowed_pos on.
+ curr_pos = start_idx + len(result_head)
+ anchor_allowed_pos = start_idx + len(result)
+ text = text[:start_idx] + result + text[match.end():]
+ else:
+ # Anchor not allowed here.
+ curr_pos = start_idx + 1
+ continue
+
+ # Reference anchor or img?
+ else:
+ match = self._tail_of_reference_link_re.match(text, p)
+ if match:
+ # Handle a reference-style anchor or img.
+ is_img = start_idx > 0 and text[start_idx-1] == "!"
+ if is_img:
+ start_idx -= 1
+ link_id = match.group("id").lower()
+ if not link_id:
+ link_id = link_text.lower() # for links like [this][]
+ if link_id in self.urls:
+ url = self.urls[link_id]
+ # We've got to encode these to avoid conflicting
+ # with italics/bold.
+ url = url.replace('*', self._escape_table['*']) \
+ .replace('_', self._escape_table['_'])
+ title = self.titles.get(link_id)
+ if title:
+ before = title
+ title = _xml_escape_attr(title) \
+ .replace('*', self._escape_table['*']) \
+ .replace('_', self._escape_table['_'])
+ title_str = ' title="%s"' % title
+ else:
+ title_str = ''
+ if is_img:
+ result = '<img src="%s" alt="%s"%s%s' \
+ % (url.replace('"', '&quot;'),
+ link_text.replace('"', '&quot;'),
+ title_str, self.empty_element_suffix)
+ if "smarty-pants" in self.extras:
+ result = result.replace('"', self._escape_table['"'])
+ curr_pos = start_idx + len(result)
+ text = text[:start_idx] + result + text[match.end():]
+ elif start_idx >= anchor_allowed_pos:
+ result = '<a href="%s"%s>%s</a>' \
+ % (url, title_str, link_text)
+ result_head = '<a href="%s"%s>' % (url, title_str)
+ result = '%s%s</a>' % (result_head, link_text)
+ if "smarty-pants" in self.extras:
+ result = result.replace('"', self._escape_table['"'])
+ # <img> allowed from curr_pos on, <a> from
+ # anchor_allowed_pos on.
+ curr_pos = start_idx + len(result_head)
+ anchor_allowed_pos = start_idx + len(result)
+ text = text[:start_idx] + result + text[match.end():]
+ else:
+ # Anchor not allowed here.
+ curr_pos = start_idx + 1
+ else:
+ # This id isn't defined, leave the markup alone.
+ curr_pos = match.end()
+ continue
+
+ # Otherwise, it isn't markup.
+ curr_pos = start_idx + 1
+
+ return text
+
+ def header_id_from_text(self, text, prefix, n):
+ """Generate a header id attribute value from the given header
+ HTML content.
+
+ This is only called if the "header-ids" extra is enabled.
+ Subclasses may override this for different header ids.
+
+ @param text {str} The text of the header tag
+ @param prefix {str} The requested prefix for header ids. This is the
+ value of the "header-ids" extra key, if any. Otherwise, None.
+ @param n {int} The <hN> tag number, i.e. `1` for an <h1> tag.
+ @returns {str} The value for the header tag's "id" attribute. Return
+ None to not have an id attribute and to exclude this header from
+ the TOC (if the "toc" extra is specified).
+ """
+ header_id = _slugify(text)
+ if prefix and isinstance(prefix, base_string_type):
+ header_id = prefix + '-' + header_id
+ if header_id in self._count_from_header_id:
+ self._count_from_header_id[header_id] += 1
+ header_id += '-%s' % self._count_from_header_id[header_id]
+ else:
+ self._count_from_header_id[header_id] = 1
+ return header_id
+
+ _toc = None
+ def _toc_add_entry(self, level, id, name):
+ if self._toc is None:
+ self._toc = []
+ self._toc.append((level, id, name))
+
+ _setext_h_re = re.compile(r'^(.+)[ \t]*\n(=+|-+)[ \t]*\n+', re.M)
+ def _setext_h_sub(self, match):
+ n = {"=": 1, "-": 2}[match.group(2)[0]]
+ demote_headers = self.extras.get("demote-headers")
+ if demote_headers:
+ n = min(n + demote_headers, 6)
+ header_id_attr = ""
+ if "header-ids" in self.extras:
+ header_id = self.header_id_from_text(match.group(1),
+ self.extras["header-ids"], n)
+ if header_id:
+ header_id_attr = ' id="%s"' % header_id
+ html = self._run_span_gamut(match.group(1))
+ if "toc" in self.extras and header_id:
+ self._toc_add_entry(n, header_id, html)
+ return "<h%d%s>%s</h%d>\n\n" % (n, header_id_attr, html, n)
+
+ _atx_h_re = re.compile(r'''
+ ^(\#{1,6}) # \1 = string of #'s
+ [ \t]*
+ (.+?) # \2 = Header text
+ [ \t]*
+ (?<!\\) # ensure not an escaped trailing '#'
+ \#* # optional closing #'s (not counted)
+ \n+
+ ''', re.X | re.M)
+ def _atx_h_sub(self, match):
+ n = len(match.group(1))
+ demote_headers = self.extras.get("demote-headers")
+ if demote_headers:
+ n = min(n + demote_headers, 6)
+ header_id_attr = ""
+ if "header-ids" in self.extras:
+ header_id = self.header_id_from_text(match.group(2),
+ self.extras["header-ids"], n)
+ if header_id:
+ header_id_attr = ' id="%s"' % header_id
+ html = self._run_span_gamut(match.group(2))
+ if "toc" in self.extras and header_id:
+ self._toc_add_entry(n, header_id, html)
+ return "<h%d%s>%s</h%d>\n\n" % (n, header_id_attr, html, n)
+
+ def _do_headers(self, text):
+ # Setext-style headers:
+ # Header 1
+ # ========
+ #
+ # Header 2
+ # --------
+ text = self._setext_h_re.sub(self._setext_h_sub, text)
+
+ # atx-style headers:
+ # # Header 1
+ # ## Header 2
+ # ## Header 2 with closing hashes ##
+ # ...
+ # ###### Header 6
+ text = self._atx_h_re.sub(self._atx_h_sub, text)
+
+ return text
+
+
+ _marker_ul_chars = '*+-'
+ _marker_any = r'(?:[%s]|\d+\.)' % _marker_ul_chars
+ _marker_ul = '(?:[%s])' % _marker_ul_chars
+ _marker_ol = r'(?:\d+\.)'
+
+ def _list_sub(self, match):
+ lst = match.group(1)
+ lst_type = match.group(3) in self._marker_ul_chars and "ul" or "ol"
+ result = self._process_list_items(lst)
+ if self.list_level:
+ return "<%s>\n%s</%s>\n" % (lst_type, result, lst_type)
+ else:
+ return "<%s>\n%s</%s>\n\n" % (lst_type, result, lst_type)
+
+ def _do_lists(self, text):
+ # Form HTML ordered (numbered) and unordered (bulleted) lists.
+
+ # Iterate over each *non-overlapping* list match.
+ pos = 0
+ while True:
+ # Find the *first* hit for either list style (ul or ol). We
+ # match ul and ol separately to avoid adjacent lists of different
+ # types running into each other (see issue #16).
+ hits = []
+ for marker_pat in (self._marker_ul, self._marker_ol):
+ less_than_tab = self.tab_width - 1
+ whole_list = r'''
+ ( # \1 = whole list
+ ( # \2
+ [ ]{0,%d}
+ (%s) # \3 = first list item marker
+ [ \t]+
+ (?!\ *\3\ ) # '- - - ...' isn't a list. See 'not_quite_a_list' test case.
+ )
+ (?:.+?)
+ ( # \4
+ \Z
+ |
+ \n{2,}
+ (?=\S)
+ (?! # Negative lookahead for another list item marker
+ [ \t]*
+ %s[ \t]+
+ )
+ )
+ )
+ ''' % (less_than_tab, marker_pat, marker_pat)
+ if self.list_level: # sub-list
+ list_re = re.compile("^"+whole_list, re.X | re.M | re.S)
+ else:
+ list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)"+whole_list,
+ re.X | re.M | re.S)
+ match = list_re.search(text, pos)
+ if match:
+ hits.append((match.start(), match))
+ if not hits:
+ break
+ hits.sort()
+ match = hits[0][1]
+ start, end = match.span()
+ text = text[:start] + self._list_sub(match) + text[end:]
+ pos = end
+
+ return text
+
+ _list_item_re = re.compile(r'''
+ (\n)? # leading line = \1
+ (^[ \t]*) # leading whitespace = \2
+ (?P<marker>%s) [ \t]+ # list marker = \3
+ ((?:.+?) # list item text = \4
+ (\n{1,2})) # eols = \5
+ (?= \n* (\Z | \2 (?P<next_marker>%s) [ \t]+))
+ ''' % (_marker_any, _marker_any),
+ re.M | re.X | re.S)
+
+ _last_li_endswith_two_eols = False
+ def _list_item_sub(self, match):
+ item = match.group(4)
+ leading_line = match.group(1)
+ leading_space = match.group(2)
+ if leading_line or "\n\n" in item or self._last_li_endswith_two_eols:
+ item = self._run_block_gamut(self._outdent(item))
+ else:
+ # Recursion for sub-lists:
+ item = self._do_lists(self._outdent(item))
+ if item.endswith('\n'):
+ item = item[:-1]
+ item = self._run_span_gamut(item)
+ self._last_li_endswith_two_eols = (len(match.group(5)) == 2)
+ return "<li>%s</li>\n" % item
+
+ def _process_list_items(self, list_str):
+ # Process the contents of a single ordered or unordered list,
+ # splitting it into individual list items.
+
+ # The $g_list_level global keeps track of when we're inside a list.
+ # Each time we enter a list, we increment it; when we leave a list,
+ # we decrement. If it's zero, we're not in a list anymore.
+ #
+ # We do this because when we're not inside a list, we want to treat
+ # something like this:
+ #
+ # I recommend upgrading to version
+ # 8. Oops, now this line is treated
+ # as a sub-list.
+ #
+ # As a single paragraph, despite the fact that the second line starts
+ # with a digit-period-space sequence.
+ #
+ # Whereas when we're inside a list (or sub-list), that line will be
+ # treated as the start of a sub-list. What a kludge, huh? This is
+ # an aspect of Markdown's syntax that's hard to parse perfectly
+ # without resorting to mind-reading. Perhaps the solution is to
+ # change the syntax rules such that sub-lists must start with a
+ # starting cardinal number; e.g. "1." or "a.".
+ self.list_level += 1
+ self._last_li_endswith_two_eols = False
+ list_str = list_str.rstrip('\n') + '\n'
+ list_str = self._list_item_re.sub(self._list_item_sub, list_str)
+ self.list_level -= 1
+ return list_str
+
+ def _get_pygments_lexer(self, lexer_name):
+ try:
+ from pygments import lexers, util
+ except ImportError:
+ return None
+ try:
+ return lexers.get_lexer_by_name(lexer_name)
+ except util.ClassNotFound:
+ return None
+
+ def _color_with_pygments(self, codeblock, lexer, **formatter_opts):
+ import pygments
+ import pygments.formatters
+
+ class HtmlCodeFormatter(pygments.formatters.HtmlFormatter):
+ def _wrap_code(self, inner):
+ """A function for use in a Pygments Formatter which
+ wraps in <code> tags.
+ """
+ yield 0, "<code>"
+ for tup in inner:
+ yield tup
+ yield 0, "</code>"
+
+ def wrap(self, source, outfile):
+ """Return the source with a code, pre, and div."""
+ return self._wrap_div(self._wrap_pre(self._wrap_code(source)))
+
+ formatter_opts.setdefault("cssclass", "codehilite")
+ formatter = HtmlCodeFormatter(**formatter_opts)
+ return pygments.highlight(codeblock, lexer, formatter)
+
+ def _code_block_sub(self, match, is_fenced_code_block=False):
+ lexer_name = None
+ if is_fenced_code_block:
+ lexer_name = match.group(1)
+ if lexer_name:
+ formatter_opts = self.extras['fenced-code-blocks'] or {}
+ codeblock = match.group(2)
+ codeblock = codeblock[:-1] # drop one trailing newline
+ else:
+ codeblock = match.group(1)
+ codeblock = self._outdent(codeblock)
+ codeblock = self._detab(codeblock)
+ codeblock = codeblock.lstrip('\n') # trim leading newlines
+ codeblock = codeblock.rstrip() # trim trailing whitespace
+
+ # Note: "code-color" extra is DEPRECATED.
+ if "code-color" in self.extras and codeblock.startswith(":::"):
+ lexer_name, rest = codeblock.split('\n', 1)
+ lexer_name = lexer_name[3:].strip()
+ codeblock = rest.lstrip("\n") # Remove lexer declaration line.
+ formatter_opts = self.extras['code-color'] or {}
+
+ if lexer_name:
+ lexer = self._get_pygments_lexer(lexer_name)
+ if lexer:
+ colored = self._color_with_pygments(codeblock, lexer,
+ **formatter_opts)
+ return "\n\n%s\n\n" % colored
+
+ codeblock = self._encode_code(codeblock)
+ pre_class_str = self._html_class_str_from_tag("pre")
+ code_class_str = self._html_class_str_from_tag("code")
+ return "\n\n<pre%s><code%s>%s\n</code></pre>\n\n" % (
+ pre_class_str, code_class_str, codeblock)
+
+ def _html_class_str_from_tag(self, tag):
+ """Get the appropriate ' class="..."' string (note the leading
+ space), if any, for the given tag.
+ """
+ if "html-classes" not in self.extras:
+ return ""
+ try:
+ html_classes_from_tag = self.extras["html-classes"]
+ except TypeError:
+ return ""
+ else:
+ if tag in html_classes_from_tag:
+ return ' class="%s"' % html_classes_from_tag[tag]
+ return ""
+
+ def _do_code_blocks(self, text):
+ """Process Markdown `<pre><code>` blocks."""
+ code_block_re = re.compile(r'''
+ (?:\n\n|\A\n?)
+ ( # $1 = the code block -- one or more lines, starting with a space/tab
+ (?:
+ (?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces
+ .*\n+
+ )+
+ )
+ ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
+ ''' % (self.tab_width, self.tab_width),
+ re.M | re.X)
+ return code_block_re.sub(self._code_block_sub, text)
+
+ _fenced_code_block_re = re.compile(r'''
+ (?:\n\n|\A\n?)
+ ^```([\w+-]+)?[ \t]*\n # opening fence, $1 = optional lang
+ (.*?) # $2 = code block content
+ ^```[ \t]*\n # closing fence