Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

initial checkin, pre testing

  • Loading branch information...
commit 7ba29190db53598481ef4fb17258126915b7a695 0 parents
@thruflo authored
Showing with 16,517 additions and 0 deletions.
  1. +3 −0  LICENSE.rst
  2. +97 −0 README.rst
  3. +133 −0 etc/redis.conf
  4. +32 −0 etc/redis.tiger.patch
  5. +40 −0 setup.py
  6. +1 −0  src/torque/__init__.py
  7. +69 −0 src/torque/app.py
  8. +153 −0 src/torque/client.py
  9. +61 −0 src/torque/config.py
  10. +142 −0 src/torque/hooks.py
  11. +48 −0 src/torque/utils.py
  12. +1 −0  vendor/redis-git
  13. +4 −0 vendor/tornado-git/.gitignore
  14. +2 −0  vendor/tornado-git/MANIFEST.in
  15. +27 −0 vendor/tornado-git/README
  16. +48 −0 vendor/tornado-git/demos/appengine/README
  17. +11 −0 vendor/tornado-git/demos/appengine/app.yaml
  18. +169 −0 vendor/tornado-git/demos/appengine/blog.py
  19. +1,877 −0 vendor/tornado-git/demos/appengine/markdown.py
  20. +153 −0 vendor/tornado-git/demos/appengine/static/blog.css
  21. +31 −0 vendor/tornado-git/demos/appengine/templates/archive.html
  22. +29 −0 vendor/tornado-git/demos/appengine/templates/base.html
  23. +42 −0 vendor/tornado-git/demos/appengine/templates/compose.html
  24. +5 −0 vendor/tornado-git/demos/appengine/templates/entry.html
  25. +26 −0 vendor/tornado-git/demos/appengine/templates/feed.xml
  26. +8 −0 vendor/tornado-git/demos/appengine/templates/home.html
  27. +8 −0 vendor/tornado-git/demos/appengine/templates/modules/entry.html
  28. +79 −0 vendor/tornado-git/demos/auth/authdemo.py
  29. +57 −0 vendor/tornado-git/demos/blog/README
  30. +195 −0 vendor/tornado-git/demos/blog/blog.py
  31. +1,877 −0 vendor/tornado-git/demos/blog/markdown.py
  32. +44 −0 vendor/tornado-git/demos/blog/schema.sql
  33. +153 −0 vendor/tornado-git/demos/blog/static/blog.css
  34. +31 −0 vendor/tornado-git/demos/blog/templates/archive.html
  35. +27 −0 vendor/tornado-git/demos/blog/templates/base.html
  36. +42 −0 vendor/tornado-git/demos/blog/templates/compose.html
  37. +5 −0 vendor/tornado-git/demos/blog/templates/entry.html
  38. +26 −0 vendor/tornado-git/demos/blog/templates/feed.xml
  39. +8 −0 vendor/tornado-git/demos/blog/templates/home.html
  40. +8 −0 vendor/tornado-git/demos/blog/templates/modules/entry.html
  41. +156 −0 vendor/tornado-git/demos/chat/chatdemo.py
  42. +56 −0 vendor/tornado-git/demos/chat/static/chat.css
  43. +135 −0 vendor/tornado-git/demos/chat/static/chat.js
  44. +37 −0 vendor/tornado-git/demos/chat/templates/index.html
  45. +1 −0  vendor/tornado-git/demos/chat/templates/message.html
  46. +8 −0 vendor/tornado-git/demos/facebook/README
  47. +127 −0 vendor/tornado-git/demos/facebook/facebook.py
  48. +97 −0 vendor/tornado-git/demos/facebook/static/facebook.css
  49. 0  vendor/tornado-git/demos/facebook/static/facebook.js
  50. +29 −0 vendor/tornado-git/demos/facebook/templates/modules/post.html
  51. +22 −0 vendor/tornado-git/demos/facebook/templates/stream.html
  52. +22 −0 vendor/tornado-git/demos/facebook/uimodules.py
  53. +43 −0 vendor/tornado-git/demos/helloworld/helloworld.py
  54. +38 −0 vendor/tornado-git/setup.py
  55. +17 −0 vendor/tornado-git/tornado/__init__.py
  56. +882 −0 vendor/tornado-git/tornado/auth.py
  57. +64 −0 vendor/tornado-git/tornado/autoreload.py
  58. +179 −0 vendor/tornado-git/tornado/database.py
  59. +112 −0 vendor/tornado-git/tornado/epoll.c
  60. +112 −0 vendor/tornado-git/tornado/escape.py
  61. +428 −0 vendor/tornado-git/tornado/httpclient.py
  62. +434 −0 vendor/tornado-git/tornado/httpserver.py
  63. +370 −0 vendor/tornado-git/tornado/ioloop.py
  64. +228 −0 vendor/tornado-git/tornado/iostream.py
  65. +395 −0 vendor/tornado-git/tornado/locale.py
  66. +350 −0 vendor/tornado-git/tornado/options.py
  67. +255 −0 vendor/tornado-git/tornado/s3server.py
  68. +561 −0 vendor/tornado-git/tornado/template.py
  69. +1,282 −0 vendor/tornado-git/tornado/web.py
  70. +298 −0 vendor/tornado-git/tornado/wsgi.py
  71. +15 −0 vendor/tornado-git/website/app.yaml
  72. 0  vendor/tornado-git/website/index.yaml
  73. +603 −0 vendor/tornado-git/website/markdown/__init__.py
  74. +95 −0 vendor/tornado-git/website/markdown/blockparser.py
  75. +460 −0 vendor/tornado-git/website/markdown/blockprocessors.py
  76. +96 −0 vendor/tornado-git/website/markdown/commandline.py
  77. +33 −0 vendor/tornado-git/website/markdown/etree_loader.py
  78. 0  vendor/tornado-git/website/markdown/extensions/__init__.py
  79. +140 −0 vendor/tornado-git/website/markdown/extensions/toc.py
  80. +274 −0 vendor/tornado-git/website/markdown/html4.py
  81. +371 −0 vendor/tornado-git/website/markdown/inlinepatterns.py
  82. +162 −0 vendor/tornado-git/website/markdown/odict.py
  83. +77 −0 vendor/tornado-git/website/markdown/postprocessors.py
  84. +214 −0 vendor/tornado-git/website/markdown/preprocessors.py
  85. +329 −0 vendor/tornado-git/website/markdown/treeprocessors.py
  86. +120 −0 vendor/tornado-git/website/static/base.css
  87. BIN  vendor/tornado-git/website/static/facebook.png
  88. BIN  vendor/tornado-git/website/static/friendfeed.png
  89. +2 −0  vendor/tornado-git/website/static/robots.txt
  90. BIN  vendor/tornado-git/website/static/tornado-0.1.tar.gz
  91. BIN  vendor/tornado-git/website/static/tornado-0.2.tar.gz
  92. BIN  vendor/tornado-git/website/static/tornado.png
  93. BIN  vendor/tornado-git/website/static/twitter.png
  94. +27 −0 vendor/tornado-git/website/templates/base.html
  95. +9 −0 vendor/tornado-git/website/templates/documentation.html
  96. +866 −0 vendor/tornado-git/website/templates/documentation.txt
  97. +51 −0 vendor/tornado-git/website/templates/index.html
  98. +63 −0 vendor/tornado-git/website/website.py
3  LICENSE.rst
@@ -0,0 +1,3 @@
+`Creative Commons CC0 1.0 Universal <http://creativecommons.org/publicdomain/zero/1.0/>`_.
+
+To the extent possible under law, `James Arthur <http://thruflo.com>`_ has waived all copyright and related or neighboring rights to `Torque <http://github.com/thruflo/torque>`_. This work is published from United Kingdom.
97 README.rst
@@ -0,0 +1,97 @@
+
+Overview
+--------
+
+"""Run the taskqueue::
+
+ $ ./bin/run-taskqueue
+
+ This will expose a Tornado webserver (by default running on port 8090,
+ use ``-p`` to specify another port, e.g.: ``-p 8081``).
+
+ To add a task to the queue, post to ``/hooks/add`` with two params:
+
+ * ``url`` which is the url to the webhook you want the task to request
+ * ``params`` which is a json encoded dictionary of the params you want
+ to post to the webhook you're requesting
+
+ An example in python might be::
+
+ try:
+ import json
+ except ImportError:
+ import simplejson as json
+ import urllib
+
+ mytask = {
+ 'url': 'http://mywebservice.com/hooks/do/foo',
+ 'params': json.dumps({'foo', 'somevalue', 'baz': 99})
+ }
+ target_url = 'http://localhost:8090/hooks/add'
+ urllib.urlopen(target_url, urllib.urlencode(mytask))
+
+ This queued a POST request to ``http://mywebservice.com/hooks/do/foo`` with
+ the params ``foo=somevalue`` and ``baz=99`` to be made as soon as possible
+ and then returned immediately.
+
+ You can do something similar using any programming language that can make
+ url requests. However, if you are using python, you can use the client api
+ that torque provides::
+
+ from torque import client
+
+ # create a task
+ t = client.Task(url='http://mywebservice.com/hooks/do/foo', params={'a': 1})
+ # add it to the queue
+ t.add()
+
+ # or just use the shortcut function to do
+ # both at the same time
+ client.add(url='http://mywebservice.com/hooks/do/foo', params={'a': 1})
+
+ You can also specify a base url for all task requests using ``--base-task-url``
+ e.g. ``--base-task-url 'http://mywebservice.com'`` allows::
+
+ t = client.Task(url='/hooks/do/foo', params={'a': 1})
+
+ You can specify a delay for the task, so that it's executed *after* (but
+ not necessarily *at*) a number of seconds::
+
+ t = client.Task(url='/hooks/do/foo', params={'a': 1}, delay=2)
+
+ Individual tasks backoff exponentially if they error, until they error
+ either ``MAX_TASK_ERRORS`` or ``--max-task-errors`` times, as which point
+ they get binned.
+"""
+
+
+
+
+Install
+-------
+
+Install the ``./vendor`` dependencies. (See ``./etc/redis.tiger.patch`` if, like me, you're still using OSX Tiger.)
+
+Then install the egg::
+
+ $ python setup.py install
+
+
+Run
+---
+
+Run redis::
+
+ $ ...
+
+Start the task queue::
+
+ $ ./bin/run-taskqueue
+
+
+Use
+---
+
+...
+
+
133 etc/redis.conf
@@ -0,0 +1,133 @@
+# Redis configuration file example
+
+# By default Redis does not run as a daemon. Use 'yes' if you need it.
+# Note that Redis will write a pid file in /var/run/redis.pid when daemonized.
+daemonize no
+
+# When run as a daemon, Redis write a pid file in /var/run/redis.pid by default.
+# You can specify a custom pid file location here.
+pidfile /var/run/redis.pid
+
+# Accept connections on the specified port, default is 6379
+port 6379
+
+# If you want you can bind a single interface, if the bind option is not
+# specified all the interfaces will listen for connections.
+#
+# bind 127.0.0.1
+
+# Close the connection after a client is idle for N seconds (0 to disable)
+timeout 300
+
+# Save the DB on disk:
+#
+# save <seconds> <changes>
+#
+# Will save the DB if both the given number of seconds and the given
+# number of write operations against the DB occurred.
+#
+# In the example below the behaviour will be to save:
+# after 900 sec (15 min) if at least 1 key changed
+# after 300 sec (5 min) if at least 10 keys changed
+# after 60 sec if at least 10000 keys changed
+##save 900 1
+##save 300 10
+##save 60 10000
+save 1 1
+
+# The filename where to dump the DB
+dbfilename dump.rdb
+
+# For default save/load DB in/from the working directory
+# Note that you must specify a directory not a file name.
+dir ./
+
+# Set server verbosity to 'debug'
+# it can be one of:
+# debug (a lot of information, useful for development/testing)
+# notice (moderately verbose, what you want in production probably)
+# warning (only very important / critical messages are logged)
+loglevel debug
+
+# Specify the log file name. Also 'stdout' can be used to force
+# the demon to log on the standard output. Note that if you use standard
+# output for logging but daemonize, logs will be sent to /dev/null
+logfile stdout
+
+# Set the number of databases. The default database is DB 0, you can select
+# a different one on a per-connection basis using SELECT <dbid> where
+# dbid is a number between 0 and 'databases'-1
+databases 16
+
+################################# REPLICATION #################################
+
+# Master-Slave replication. Use slaveof to make a Redis instance a copy of
+# another Redis server. Note that the configuration is local to the slave
+# so for example it is possible to configure the slave to save the DB with a
+# different interval, or to listen to another port, and so on.
+
+# slaveof <masterip> <masterport>
+
+################################## SECURITY ###################################
+
+# Require clients to issue AUTH <PASSWORD> before processing any other
+# commands. This might be useful in environments in which you do not trust
+# others with access to the host running redis-server.
+#
+# This should stay commented out for backward compatibility and because most
+# people do not need auth (e.g. they run their own servers).
+
+# requirepass foobared
+
+################################### LIMITS ####################################
+
+# Set the max number of connected clients at the same time. By default there
+# is no limit, and it's up to the number of file descriptors the Redis process
+# is able to open. The special value '0' means no limts.
+# Once the limit is reached Redis will close all the new connections sending
+# an error 'max number of clients reached'.
+
+# maxclients 128
+
+# Don't use more memory than the specified amount of bytes.
+# When the memory limit is reached Redis will try to remove keys with an
+# EXPIRE set. It will try to start freeing keys that are going to expire
+# in little time and preserve keys with a longer time to live.
+# Redis will also try to remove objects from free lists if possible.
+#
+# If all this fails, Redis will start to reply with errors to commands
+# that will use more memory, like SET, LPUSH, and so on, and will continue
+# to reply to most read-only commands like GET.
+#
+# WARNING: maxmemory can be a good idea mainly if you want to use Redis as a
+# 'state' server or cache, not as a real DB. When Redis is used as a real
+# database the memory usage will grow over the weeks, it will be obvious if
+# it is going to use too much memory in the long run, and you'll have the time
+# to upgrade. With maxmemory after the limit is reached you'll start to get
+# errors for write operations, and this may even lead to DB inconsistency.
+
+# maxmemory <bytes>
+
+############################### ADVANCED CONFIG ###############################
+
+# Glue small output buffers together in order to send small replies in a
+# single TCP packet. Uses a bit more CPU but most of the times it is a win
+# in terms of number of queries per second. Use 'yes' if unsure.
+glueoutputbuf yes
+
+# Use object sharing. Can save a lot of memory if you have many common
+# string in your dataset, but performs lookups against the shared objects
+# pool so it uses more CPU and can be a bit slower. Usually it's a good
+# idea.
+#
+# When object sharing is enabled (shareobjects yes) you can use
+# shareobjectspoolsize to control the size of the pool used in order to try
+# object sharing. A bigger pool size will lead to better sharing capabilities.
+# In general you want this value to be at least the double of the number of
+# very common strings you have in your dataset.
+#
+# WARNING: object sharing is experimental, don't enable this feature
+# in production before of Redis 1.0-stable. Still please try this feature in
+# your development environment so that we can test it better.
+shareobjects no
+shareobjectspoolsize 1024
32 etc/redis.tiger.patch
@@ -0,0 +1,32 @@
+diff -urN redis-1.02/Makefile redis-1.02.new/Makefile
+--- redis-1.02/Makefile 2009-09-03 04:43:43.000000000 -0400
++++ redis-1.02.new/Makefile 2009-11-15 03:07:56.000000000 -0500
+@@ -2,7 +2,7 @@
+ # Copyright (C) 2009 Salvatore Sanfilippo <antirez at gmail dot com>
+ # This file is released under the BSD license, see the COPYING file
+
+-DEBUG?= -g -rdynamic -ggdb
++DEBUG?= -g -ggdb
+ CFLAGS?= -std=c99 -pedantic -O2 -Wall -W
+ CCOPT= $(CFLAGS)
+
+diff -urN redis-1.02/config.h redis-1.02.new/config.h
+--- redis-1.02/config.h 2009-09-03 04:43:43.000000000 -0400
++++ redis-1.02.new/config.h 2009-11-15 03:08:35.000000000 -0500
+@@ -13,7 +13,7 @@
+ #endif
+
+ /* define redis_fstat to fstat or fstat64() */
+-#if defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
++#if defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_5)
+ #define redis_fstat fstat64
+ #define redis_stat stat64
+ #else
+@@ -22,7 +22,7 @@
+ #endif
+
+ /* test for backtrace() */
+-#if defined(__APPLE__) || defined(__linux__)
++#if defined(MAC_OS_X_VERSION_10_5) || defined(__linux__)
+ #define HAVE_BACKTRACE 1
+ #endif
40 setup.py
@@ -0,0 +1,40 @@
+from setuptools import setup, find_packages
+
+setup(
+ name = 'torque',
+ version = '0.3',
+ description = 'Tornado based web hook task queue',
+ long_description = open('README.rst').read(),
+ author = 'James Arthur',
+ author_email = 'thruflo@googlemail.com',
+ url = 'http://github.com/thruflo/torque',
+ classifiers = [
+ 'Development Status :: 3 - Alpha',
+ 'Environment :: Web Environment',
+ 'Intended Audience :: Developers',
+ 'License :: Public Domain',
+ 'Programming Language :: Python'
+ ],
+ license = open('LICENSE.rst').read(),
+ packages = find_packages('src'),
+ package_dir = {'': 'src'},
+ include_package_data = True,
+ zip_safe = False,
+ install_requires=[
+ 'setuptools_git==0.3.4',
+ 'pycurl==7.18.1',
+ 'simplejson==2.0.9',
+ 'tornado==0.2',
+ 'redis==0.6.1',
+ 'nose==0.11.1'
+ ],
+ test_suite = 'nose.collector',
+ entry_points = {
+ 'setuptools.file_finders': [
+ 'findfiles = setuptools_git:gitlsfiles'
+ ],
+ 'console_scripts': [
+ 'run-taskqueue = torque.taskqueue:main'
+ ]
+ }
+)
1  src/torque/__init__.py
@@ -0,0 +1 @@
+# makes this folder a python package
69 src/torque/app.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""Sets up the Tornado web application and provides a ``main`` function
+ which starts it up and starts the taskqueue polling.
+"""
+
+import logging
+import time
+
+from tornado import ioloop, httpserver, web
+
+from config import options
+from hooks import AddTask, ConcurrentExecuter
+from utils import dispatch_request
+
+application = web.Application([(
+ r'/hooks/add',
+ AddTask,
+ ), (
+ r'/hooks/execute',
+ ConcurrentExecuter
+ )
+ ],
+ debug=options.debug
+)
+
+def loop():
+ backoff = options.min_delay
+ while True:
+ status = dispatch_request(
+ url='http://localhost:%s/hooks/execute' % options.port,
+ params={
+ 'queue_name': options.queue_name,
+ 'limit': options.max_concurrent_tasks
+ }
+ )
+ if status == 200:
+ if backoff > options.min_delay:
+ backoff = backoff / options.error_multiplier
+ if backoff < options.min_delay:
+ backoff = options.min_delay
+ elif status == 204: # there were no tasks to execute
+ backoff = backoff * options.empty_multiplier
+ if backoff > options.max_empty_delay:
+ backoff = options.max_empty_delay
+ else: # there was an unexpected error
+ backoff = backoff * options.error_multiplier
+ if backoff > options.max_error_delay:
+ backoff = options.max_error_delay
+ time.sleep(backoff)
+
+
+
+def main():
+ # set the logging level
+ logging.getLogger().setLevel(getattr(logging, options.log_level.upper()))
+ # start the http server
+ http_server = httpserver.HTTPServer(application)
+ http_server.listen(8888)
+ # start the async io loop
+ ioloop.IOLoop.instance().start()
+ # start the queue polling
+ loop()
+
+
+if __name__ == "__main__":
+ main()
+
153 src/torque/client.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""Provides helper methods for adding tasks to and fetching
+ tasks from a redis backed queue.
+
+ Trivial thanks to http://code.google.com/p/redis/wiki/SortedSets
+"""
+
+import time
+
+import redis as redis
+r = redis.Redis()
+
+try:
+ import json
+except ImportError:
+ import simplejson as json
+
+from config import options
+from utils import normalise_url
+
+class Task(object):
+ """A task consists of a ``url`` to post some ``params`` to::
+
+ >>> task = Task(url='/hooks/foo', params={'a': 1})
+
+ Add it to the queue::
+
+ >>> task.add()
+
+ You can schedule the task to be executed *after* a number of
+ seconds in the future::
+
+ >>> task.add(delay=2)
+
+ And you can specify which queue to stick it in::
+
+ >>> task.add(queue_name='foo')
+
+ """
+
+ def __init__(self, url, params={}, queue_name=options.queue_name):
+ self.doc = {
+ 'url': normalise_url(url),
+ 'params': params
+ }
+ self.queue_name = queue_name
+
+
+ def add(self, queue_name=None, delay=0):
+ """Adds a task to the queue.
+
+ See http://code.google.com/p/redis/wiki/ZaddCommand
+
+ @@ because this is a sorted set, if the task is a
+ duplicate, it has its timestamp updated. This may
+ or may not be quite what we want task-delay wise
+ but it certainly helps minimise processing.
+ """
+
+ task_string = json.dumps(self.doc)
+ ts = time.time() + delay
+ queue_name = queue_name and queue_name or self.queue_name
+ return r.zadd(queue_name, task_string, ts)
+
+ def remove(self, queue_name=None):
+ """http://code.google.com/p/redis/wiki/ZremCommand
+ """
+
+ task_string = json.dumps(self.doc)
+ queue_name = queue_name and queue_name or self.queue_name
+ return r.zrem(queue_name, task_string)
+
+
+ def __repr__(self):
+ return u'<torque.client.Task queue=%s, url=%s, params=%s>' % (
+ self.queue_name,
+ self.doc['url'],
+ self.doc['params']
+ )
+
+
+
+def _ensure_task_string(what):
+ if isinstance(what, Task):
+ return json.dumps(what.doc)
+ elif isinstance(what, dict):
+ return json.dumps(what)
+ else: # isinstance(task, basestring)
+ return what
+
+
+
+def add(url, params, delay=0, queue_name=options.queue_name):
+ t = Task(url=url, params=params, queue_name=queue_name)
+ return t.add(delay=delay)
+
+def update(task, delay=0, queue_name=options.queue_name):
+ task_string = _ensure_task_string(task)
+ ts = time.time() + delay
+ return r.zadd(queue_name, task_string, ts)
+
+def remove(task_string, queue_name=options.queue_name):
+ task_string = _ensure_task_string(task)
+ return r.zrem(queue_name, task_string)
+
+
+def fetch(
+ ts=None,
+ delay=0,
+ decode=True,
+ limit=options.max_concurrent_tasks,
+ queue_name=options.queue_name
+ ):
+ """Gets upto ``limit`` tasks from the queue, in timestamp order.
+
+ If ``decode`` is true, returns the tasks as dicts.
+
+ See http://code.google.com/p/redis/wiki/ZrangebyscoreCommand
+ """
+
+ if ts is None and delay == 0:
+ ts = time.time()
+ elif ts is None:
+ ts = time.time() + delay
+ if limit is None:
+ limit = 'inf'
+ results = r.send_command(
+ 'ZRANGEBYSCORE %s 0 %s LIMIT 0 %s\r\n' % (
+ queue_name,
+ ts,
+ limit
+ )
+ )
+ if not decode:
+ return results
+ return [json.loads(item) for item in results]
+
+
+def get_and_increment_error_count(task_string):
+ task_string = _ensure_task_string(task_string)
+ error_key = u'%s_error_count'
+ if r.exists(error_key):
+ error_count = int(r.get(error_key))
+ else:
+ error_count = 0
+ error_count += 1
+ r.set(error_key, str(error_count))
+ r.expire(error_key, 172800)
+ return error_count
+
+
61 src/torque/config.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""Parse command line options into config.*
+"""
+
+import logging
+
+from optparse import OptionParser
+
+# setup the option parser
+parser = OptionParser()
+parser.add_option(
+ '--logging', dest='log_level', default='INFO',
+ help='logging level'
+)
+parser.add_option(
+ '--debug', dest='debug', default=False,
+ help='debug mode'
+)
+parser.add_option(
+ '--port', dest='port', default=8090,
+ help='which port to run on'
+)
+parser.add_option(
+ '--queue-name', dest='queue_name', default='default_taskqueue',
+ help='name of the queue - useful if you want to run more than one'
+)
+parser.add_option(
+ '--base-task-url', dest='base_task_url', default='http://localhost:8080',
+ help='base url to use if and when expanding relative task urls'
+)
+parser.add_option(
+ '--max-concurrent-tasks', dest='max_concurrent_tasks', default=5,
+ help='how many tasks can be processed concurrently?'
+)
+parser.add_option(
+ '--max-task-errors', dest='max_task_errors', default=8,
+ help='how many times can a task error?'
+)
+parser.add_option(
+ '--min-delay', dest='min_delay', default=0.2,
+ help='how long to wait between polling when there are tasks pending'
+)
+parser.add_option(
+ '--max-empty-delay', dest='max_empty_delay', default=1.6,
+ help='how long to wait between polling when there are no tasks pending'
+)
+parser.add_option(
+ '--max-error-delay', dest='max_error_delay', default=240,
+ help='how long to wait between polling when the concurrent executer is erroring'
+)
+parser.add_option(
+ '--empty-multiplier', dest='empty_multiplier', default=2.0,
+ help='what to multiply the delay by when empty'
+)
+parser.add_option(
+ '--error-multiplier', dest='error_multiplier', default=4.0,
+ help='what to multiply the delay by when erroring'
+)
+(options, args) = parser.parse_args()
142 src/torque/hooks.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import logging
+import time
+
+try:
+ import json
+except ImportError:
+ import simplejson as json
+
+from tornado import httpclient, web
+
+from client import add, update, remove, fetch
+from config import options
+from utils import unicode_urlencode
+
+class ConcurrentExecuter(web.RequestHandler):
+ """Takes a ``queue_name``, fetches ``limit`` items from
+ the queue, and posts them individually via concurrent,
+ non-blocking requests.
+
+ If the queue is empty, returns 204 to indicate there's
+ no content to process.
+
+ If an individual task errors, its ``ts`` is incremented
+ according to a backoff algorithm
+ """
+
+ def get(self):
+ self.post()
+
+ @web.asynchronous
+ def post(self):
+ logging.info('ConcurrentExecuter.post')
+ # queue_name and limit are optional
+ kwargs = {}
+ queue_name = self.get_argument('queue_name', False)
+ if queue_name:
+ kwargs['queue_name'] = queue_name
+ limit = self.get_argument('limit', False)
+ if limit:
+ kwargs['limit'] = limit
+ tasks = fetch(**kwargs, decode=False)
+ if len(tasks) == 0:
+ logging.info('no tasks left')
+ self.set_status(204)
+ self.finish()
+ else:
+ logging.info('picked up %s tasks' % len(tasks))
+ self.kwargs = queue_name and {'queue_name': queue_name} or {}
+ self.task_strings = []
+ http = httpclient.AsyncHTTPClient()
+ for task_string in tasks:
+ task = json.loads(task_string)
+ url = task['url']
+ params = task['params']
+ logging.info('httpclient.AsyncHTTPClient.fetch %s' % task)
+ http.fetch(
+ url,
+ method='POST',
+ body=unicode_urlencode(params),
+ callback=self.async_callback(
+ self._handle_response,
+ task_string = task_string
+ )
+ )
+ self.task_strings.append(task_string)
+
+
+
+ def _handle_response(self, response, task_string):
+ logging.info('ConcurrentExecuter._handle_response for %s' % task_string)
+
+ if not response.error:
+ # delete the task from the queue
+ remove(task_string, **self.kwargs)
+ logging.info('deleted %s' % task_string)
+ else:
+ logging.info(response.error)
+ # if it's less than MAX_ERRORS
+ error_count = get_and_increment_error_count(task_string)
+ if error_count < MAX_TASK_ERRORS:
+ # backoff scheduling it again
+ delay = options.min_delay
+ while error_count > 0:
+ delay = delay * options.error_multiplier
+ error_count -= 1
+ update(task_string, delay=delay, **self.kwargs)
+ logging.info('backed %s off for %s secs' % (task_string, delay))
+ else: # delete it
+ remove(task_string, **self.kwargs)
+ logging.info('deleted %s' % task_string)
+ # if all the requests have returned
+ logging.info('removing %s from...' % task_string)
+ logging.info(self.task_strings)
+ self.task_strings.remove(task_string)
+ logging.info(self.task_strings)
+ if len(self.task_strings) == 0:
+ # finish the request, returning a status of 200
+ self.set_status(200)
+ self.finish()
+ logging.info('finished')
+
+
+
+
+class AddTask(web.RequestHandler):
+ """Webhook available on ``/hooks/add`` that allows tasks to
+ be added to the queue over an http request, i.e.: from any
+ programming language.
+
+ To add a task to the queue, post to ``/hooks/add`` with two params:
+
+ * ``url`` which is the url to the webhook you want the task to request
+ * ``params`` which is a json encoded dictionary of the params you want
+ to post to the webhook you're requesting
+
+ You can also provide a ``delay`` and a ``queue_name``.
+ """
+
+ def get(self):
+ self.post()
+
+ @web.asynchronous
+ def post(self):
+ # url is required
+ url = self.get_argument('url')
+ # params are passed in empty if not provided
+ kwargs = {
+ 'params': json.loads(self.get_argument('params', '{}')),
+ }
+ # queue_name and delay are optional
+ queue_name = self.get_argument('queue_name', False)
+ if queue_name:
+ kwargs['queue_name'] = queue_name
+ delay = self.get_argument('delay', False)
+ if delay:
+ kwargs['delay'] = delay
+ return add(url, **kwargs)
+
+
48 src/torque/utils.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""Misc helper functions.
+"""
+
+import re
+starts_with_some_text_a_colon_and_two_fwd_slashes = re.compile(r'^[a-zA-Z]+:\/\/')
+
+import urllib, urllib2
+
+from config import options
+
+def normalise_url(url):
+ if starts_with_some_text_a_colon_and_two_fwd_slashes.match(url):
+ return url
+ return u'%s%s' % (
+ options.base_task_url,
+ url.startswith('/') and url or unicode(u'/' + url)
+ )
+
+
+def unicode_urlencode(params):
+ if isinstance(params, dict):
+ params = params.items()
+ return urllib.urlencode([(
+ k,
+ isinstance(v, unicode) and v.encode('utf-8') or v
+ ) for k, v in params
+ ]
+ )
+
+
+def dispatch_request(url, params={}):
+ url = normalise_url(url)
+ postdata = unicode_urlencode(params)
+ request = urllib2.Request(url, postdata)
+ try:
+ response = urllib2.urlopen(request)
+ except Exception, err:
+ logging.warning('error dispatching request')
+ logging.warning(request)
+ logging.warning(err)
+ return 500
+ else:
+ return response.status
+
+
1  vendor/redis-git
@@ -0,0 +1 @@
+Subproject commit 322fc7d855d3c19f8ca95cf5d66f8111f7981a30
4 vendor/tornado-git/.gitignore
@@ -0,0 +1,4 @@
+*.pyc
+*.so
+*~
+build
2  vendor/tornado-git/MANIFEST.in
@@ -0,0 +1,2 @@
+recursive-include demos *.py *.yaml *.html *.css *.png *.js *.xml *.sql README
+include tornado/epoll.c
27 vendor/tornado-git/README
@@ -0,0 +1,27 @@
+Tornado
+=======
+Tornado is an open source version of the scalable, non-blocking web server
+and and tools that power FriendFeed. Documentation and downloads are
+available at http://www.tornadoweb.org/
+
+Tornado is licensed under the Apache Licence, Version 2.0
+(http://www.apache.org/licenses/LICENSE-2.0.html).
+
+Installation
+============
+To install:
+
+ python setup.py build
+ sudo python setup.py install
+
+Tornado has been tested on Python 2.5 and 2.6. To use all of the features
+of Tornado, you need to have PycURL and a JSON library like simplejson
+installed.
+
+On Mac OS X, you can install the packages with:
+
+ sudo easy_install setuptools pycurl==7.16.2.1 simplejson
+
+On Ubuntu Linux, you can install the packages with:
+
+ sudo apt-get install python-pycurl python-simplejson
48 vendor/tornado-git/demos/appengine/README
@@ -0,0 +1,48 @@
+Running the Tornado AppEngine example
+=====================================
+This example is designed to run in Google AppEngine, so there are a couple
+of steps to get it running. You can download the Google AppEngine Python
+development environment at http://code.google.com/appengine/downloads.html.
+
+1. Link or copy the tornado code directory into this directory:
+
+ ln -s ../../tornado tornado
+
+ AppEngine doesn't use the Python modules installed on this machine.
+ You need to have the 'tornado' module copied or linked for AppEngine
+ to find it.
+
+3. Install and run dev_appserver
+
+ If you don't already have the App Engine SDK, download it from
+ http://code.google.com/appengine/downloads.html
+
+ To start the tornado demo, run the dev server on this directory:
+
+ dev_appserver.py .
+
+4. Visit http://localhost:8080/ in your browser
+
+ If you sign in as an administrator, you will be able to create and
+ edit blog posts. If you sign in as anybody else, you will only see
+ the existing blog posts.
+
+
+If you want to deploy the blog in production:
+
+1. Register a new appengine application and put its id in app.yaml
+
+ First register a new application at http://appengine.google.com/.
+ Then edit app.yaml in this directory and change the "application"
+ setting from "tornado-appenginge" to your new application id.
+
+2. Deploy to App Engine
+
+ If you registered an application id, you can now upload your new
+ Tornado blog by running this command:
+
+ appcfg update .
+
+ After that, visit application_id.appspot.com, where application_id
+ is the application you registered.
+
11 vendor/tornado-git/demos/appengine/app.yaml
@@ -0,0 +1,11 @@
+application: tornado-appengine
+version: 1
+runtime: python
+api_version: 1
+
+handlers:
+- url: /static/
+ static_dir: static
+
+- url: /.*
+ script: blog.py
169 vendor/tornado-git/demos/appengine/blog.py
@@ -0,0 +1,169 @@
+#!/usr/bin/env python
+#
+# Copyright 2009 Facebook
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import functools
+import markdown
+import os.path
+import re
+import tornado.web
+import tornado.wsgi
+import unicodedata
+import wsgiref.handlers
+
+from google.appengine.api import users
+from google.appengine.ext import db
+
+
+class Entry(db.Model):
+ """A single blog entry."""
+ author = db.UserProperty()
+ title = db.StringProperty(required=True)
+ slug = db.StringProperty(required=True)
+ markdown = db.TextProperty(required=True)
+ html = db.TextProperty(required=True)
+ published = db.DateTimeProperty(auto_now_add=True)
+ updated = db.DateTimeProperty(auto_now=True)
+
+
+def administrator(method):
+ """Decorate with this method to restrict to site admins."""
+ @functools.wraps(method)
+ def wrapper(self, *args, **kwargs):
+ if not self.current_user:
+ if self.request.method == "GET":
+ self.redirect(self.get_login_url())
+ return
+ raise tornado.web.HTTPError(403)
+ elif not self.current_user.administrator:
+ if self.request.method == "GET":
+ self.redirect("/")
+ return
+ raise tornado.web.HTTPError(403)
+ else:
+ return method(self, *args, **kwargs)
+ return wrapper
+
+
+class BaseHandler(tornado.web.RequestHandler):
+ """Implements Google Accounts authentication methods."""
+ def get_current_user(self):
+ user = users.get_current_user()
+ if user: user.administrator = users.is_current_user_admin()
+ return user
+
+ def get_login_url(self):
+ return users.create_login_url(self.request.uri)
+
+ def render_string(self, template_name, **kwargs):
+ # Let the templates access the users module to generate login URLs
+ return tornado.web.RequestHandler.render_string(
+ self, template_name, users=users, **kwargs)
+
+
+class HomeHandler(BaseHandler):
+ def get(self):
+ entries = db.Query(Entry).order('-published').fetch(limit=5)
+ if not entries:
+ if not self.current_user or self.current_user.administrator:
+ self.redirect("/compose")
+ return
+ self.render("home.html", entries=entries)
+
+
+class EntryHandler(BaseHandler):
+ def get(self, slug):
+ entry = db.Query(Entry).filter("slug =", slug).get()
+ if not entry: raise tornado.web.HTTPError(404)
+ self.render("entry.html", entry=entry)
+
+
+class ArchiveHandler(BaseHandler):
+ def get(self):
+ entries = db.Query(Entry).order('-published')
+ self.render("archive.html", entries=entries)
+
+
+class FeedHandler(BaseHandler):
+ def get(self):
+ entries = db.Query(Entry).order('-published').fetch(limit=10)
+ self.set_header("Content-Type", "application/atom+xml")
+ self.render("feed.xml", entries=entries)
+
+
+class ComposeHandler(BaseHandler):
+ @administrator
+ def get(self):
+ key = self.get_argument("key", None)
+ entry = Entry.get(key) if key else None
+ self.render("compose.html", entry=entry)
+
+ @administrator
+ def post(self):
+ key = self.get_argument("key", None)
+ if key:
+ entry = Entry.get(key)
+ entry.title = self.get_argument("title")
+ entry.markdown = self.get_argument("markdown")
+ entry.html = markdown.markdown(self.get_argument("markdown"))
+ else:
+ title = self.get_argument("title")
+ slug = unicodedata.normalize("NFKD", title).encode(
+ "ascii", "ignore")
+ slug = re.sub(r"[^\w]+", " ", slug)
+ slug = "-".join(slug.lower().strip().split())
+ if not slug: slug = "entry"
+ while True:
+ existing = db.Query(Entry).filter("slug =", slug).get()
+ if not existing or str(existing.key()) == key:
+ break
+ slug += "-2"
+ entry = Entry(
+ author=self.current_user,
+ title=title,
+ slug=slug,
+ markdown=self.get_argument("markdown"),
+ html=markdown.markdown(self.get_argument("markdown")),
+ )
+ entry.put()
+ self.redirect("/entry/" + entry.slug)
+
+
+class EntryModule(tornado.web.UIModule):
+ def render(self, entry):
+ return self.render_string("modules/entry.html", entry=entry)
+
+
+settings = {
+ "blog_title": u"Tornado Blog",
+ "template_path": os.path.join(os.path.dirname(__file__), "templates"),
+ "ui_modules": {"Entry": EntryModule},
+ "xsrf_cookies": True,
+}
+application = tornado.wsgi.WSGIApplication([
+ (r"/", HomeHandler),
+ (r"/archive", ArchiveHandler),
+ (r"/feed", FeedHandler),
+ (r"/entry/([^/]+)", EntryHandler),
+ (r"/compose", ComposeHandler),
+], **settings)
+
+
+def main():
+ wsgiref.handlers.CGIHandler().run(application)
+
+
+if __name__ == "__main__":
+ main()
1,877 vendor/tornado-git/demos/appengine/markdown.py
@@ -0,0 +1,1877 @@
+#!/usr/bin/env python
+# Copyright (c) 2007-2008 ActiveState Corp.
+# License: MIT (http://www.opensource.org/licenses/mit-license.php)
+
+r"""A fast and complete Python implementation of Markdown.
+
+[from http://daringfireball.net/projects/markdown/]
+> Markdown is a text-to-HTML filter; it translates an easy-to-read /
+> easy-to-write structured text format into HTML. Markdown's text
+> format is most similar to that of plain text email, and supports
+> features such as headers, *emphasis*, code blocks, blockquotes, and
+> links.
+>
+> Markdown's syntax is designed not as a generic markup language, but
+> specifically to serve as a front-end to (X)HTML. You can use span-level
+> HTML tags anywhere in a Markdown document, and you can use block level
+> HTML tags (like <div> and <table> as well).
+
+Module usage:
+
+ >>> import markdown2
+ >>> markdown2.markdown("*boo!*") # or use `html = markdown_path(PATH)`
+ u'<p><em>boo!</em></p>\n'
+
+ >>> markdowner = Markdown()
+ >>> markdowner.convert("*boo!*")
+ u'<p><em>boo!</em></p>\n'
+ >>> markdowner.convert("**boom!**")
+ u'<p><strong>boom!</strong></p>\n'
+
+This implementation of Markdown implements the full "core" syntax plus a
+number of extras (e.g., code syntax coloring, footnotes) as described on
+<http://code.google.com/p/python-markdown2/wiki/Extras>.
+"""
+
+cmdln_desc = """A fast and complete Python implementation of Markdown, a
+text-to-HTML conversion tool for web writers.
+"""
+
+# Dev Notes:
+# - There is already a Python markdown processor
+# (http://www.freewisdom.org/projects/python-markdown/).
+# - Python's regex syntax doesn't have '\z', so I'm using '\Z'. I'm
+# not yet sure if there implications with this. Compare 'pydoc sre'
+# and 'perldoc perlre'.
+
+__version_info__ = (1, 0, 1, 14) # first three nums match Markdown.pl
+__version__ = '1.0.1.14'
+__author__ = "Trent Mick"
+
+import os
+import sys
+from pprint import pprint
+import re
+import logging
+try:
+ from hashlib import md5
+except ImportError:
+ from md5 import md5
+import optparse
+from random import random
+import codecs
+
+
+
+#---- Python version compat
+
+if sys.version_info[:2] < (2,4):
+ from sets import Set as set
+ def reversed(sequence):
+ for i in sequence[::-1]:
+ yield i
+ def _unicode_decode(s, encoding, errors='xmlcharrefreplace'):
+ return unicode(s, encoding, errors)
+else:
+ def _unicode_decode(s, encoding, errors='strict'):
+ return s.decode(encoding, errors)
+
+
+#---- globals
+
+DEBUG = False
+log = logging.getLogger("markdown")
+
+DEFAULT_TAB_WIDTH = 4
+
+# Table of hash values for escaped characters:
+def _escape_hash(s):
+ # Lame attempt to avoid possible collision with someone actually
+ # using the MD5 hexdigest of one of these chars in there text.
+ # Other ideas: random.random(), uuid.uuid()
+ #return md5(s).hexdigest() # Markdown.pl effectively does this.
+ return 'md5-'+md5(s).hexdigest()
+g_escape_table = dict([(ch, _escape_hash(ch))
+ for ch in '\\`*_{}[]()>#+-.!'])
+
+
+
+#---- exceptions
+
+class MarkdownError(Exception):
+ pass
+
+
+
+#---- public api
+
+def markdown_path(path, encoding="utf-8",
+ html4tags=False, tab_width=DEFAULT_TAB_WIDTH,
+ safe_mode=None, extras=None, link_patterns=None,
+ use_file_vars=False):
+ text = codecs.open(path, 'r', encoding).read()
+ return Markdown(html4tags=html4tags, tab_width=tab_width,
+ safe_mode=safe_mode, extras=extras,
+ link_patterns=link_patterns,
+ use_file_vars=use_file_vars).convert(text)
+
+def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH,
+ safe_mode=None, extras=None, link_patterns=None,
+ use_file_vars=False):
+ return Markdown(html4tags=html4tags, tab_width=tab_width,
+ safe_mode=safe_mode, extras=extras,
+ link_patterns=link_patterns,
+ use_file_vars=use_file_vars).convert(text)
+
+class Markdown(object):
+ # The dict of "extras" to enable in processing -- a mapping of
+ # extra name to argument for the extra. Most extras do not have an
+ # argument, in which case the value is None.
+ #
+ # This can be set via (a) subclassing and (b) the constructor
+ # "extras" argument.
+ extras = None
+
+ urls = None
+ titles = None
+ html_blocks = None
+ html_spans = None
+ html_removed_text = "[HTML_REMOVED]" # for compat with markdown.py
+
+ # Used to track when we're inside an ordered or unordered list
+ # (see _ProcessListItems() for details):
+ list_level = 0
+
+ _ws_only_line_re = re.compile(r"^[ \t]+$", re.M)
+
+ def __init__(self, html4tags=False, tab_width=4, safe_mode=None,
+ extras=None, link_patterns=None, use_file_vars=False):
+ if html4tags:
+ self.empty_element_suffix = ">"
+ else:
+ self.empty_element_suffix = " />"
+ self.tab_width = tab_width
+
+ # For compatibility with earlier markdown2.py and with
+ # markdown.py's safe_mode being a boolean,
+ # safe_mode == True -> "replace"
+ if safe_mode is True:
+ self.safe_mode = "replace"
+ else:
+ self.safe_mode = safe_mode
+
+ if self.extras is None:
+ self.extras = {}
+ elif not isinstance(self.extras, dict):
+ self.extras = dict([(e, None) for e in self.extras])
+ if extras:
+ if not isinstance(extras, dict):
+ extras = dict([(e, None) for e in extras])
+ self.extras.update(extras)
+ assert isinstance(self.extras, dict)
+ self._instance_extras = self.extras.copy()
+ self.link_patterns = link_patterns
+ self.use_file_vars = use_file_vars
+ self._outdent_re = re.compile(r'^(\t|[ ]{1,%d})' % tab_width, re.M)
+
+ def reset(self):
+ self.urls = {}
+ self.titles = {}
+ self.html_blocks = {}
+ self.html_spans = {}
+ self.list_level = 0
+ self.extras = self._instance_extras.copy()
+ if "footnotes" in self.extras:
+ self.footnotes = {}
+ self.footnote_ids = []
+
+ def convert(self, text):
+ """Convert the given text."""
+ # Main function. The order in which other subs are called here is
+ # essential. Link and image substitutions need to happen before
+ # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
+ # and <img> tags get encoded.
+
+ # Clear the global hashes. If we don't clear these, you get conflicts
+ # from other articles when generating a page which contains more than
+ # one article (e.g. an index page that shows the N most recent
+ # articles):
+ self.reset()
+
+ if not isinstance(text, unicode):
+ #TODO: perhaps shouldn't presume UTF-8 for string input?
+ text = unicode(text, 'utf-8')
+
+ if self.use_file_vars:
+ # Look for emacs-style file variable hints.
+ emacs_vars = self._get_emacs_vars(text)
+ if "markdown-extras" in emacs_vars:
+ splitter = re.compile("[ ,]+")
+ for e in splitter.split(emacs_vars["markdown-extras"]):
+ if '=' in e:
+ ename, earg = e.split('=', 1)
+ try:
+ earg = int(earg)
+ except ValueError:
+ pass
+ else:
+ ename, earg = e, None
+ self.extras[ename] = earg
+
+ # Standardize line endings:
+ text = re.sub("\r\n|\r", "\n", text)
+
+ # Make sure $text ends with a couple of newlines:
+ text += "\n\n"
+
+ # Convert all tabs to spaces.
+ text = self._detab(text)
+
+ # Strip any lines consisting only of spaces and tabs.
+ # This makes subsequent regexen easier to write, because we can
+ # match consecutive blank lines with /\n+/ instead of something
+ # contorted like /[ \t]*\n+/ .
+ text = self._ws_only_line_re.sub("", text)
+
+ if self.safe_mode:
+ text = self._hash_html_spans(text)
+
+ # Turn block-level HTML blocks into hash entries
+ text = self._hash_html_blocks(text, raw=True)
+
+ # Strip link definitions, store in hashes.
+ if "footnotes" in self.extras:
+ # Must do footnotes first because an unlucky footnote defn
+ # looks like a link defn:
+ # [^4]: this "looks like a link defn"
+ text = self._strip_footnote_definitions(text)
+ text = self._strip_link_definitions(text)
+
+ text = self._run_block_gamut(text)
+
+ if "footnotes" in self.extras:
+ text = self._add_footnotes(text)
+
+ text = self._unescape_special_chars(text)
+
+ if self.safe_mode:
+ text = self._unhash_html_spans(text)
+
+ text += "\n"
+ return text
+
+ _emacs_oneliner_vars_pat = re.compile(r"-\*-\s*([^\r\n]*?)\s*-\*-", re.UNICODE)
+ # This regular expression is intended to match blocks like this:
+ # PREFIX Local Variables: SUFFIX
+ # PREFIX mode: Tcl SUFFIX
+ # PREFIX End: SUFFIX
+ # Some notes:
+ # - "[ \t]" is used instead of "\s" to specifically exclude newlines
+ # - "(\r\n|\n|\r)" is used instead of "$" because the sre engine does
+ # not like anything other than Unix-style line terminators.
+ _emacs_local_vars_pat = re.compile(r"""^
+ (?P<prefix>(?:[^\r\n|\n|\r])*?)
+ [\ \t]*Local\ Variables:[\ \t]*
+ (?P<suffix>.*?)(?:\r\n|\n|\r)
+ (?P<content>.*?\1End:)
+ """, re.IGNORECASE | re.MULTILINE | re.DOTALL | re.VERBOSE)
+
+ def _get_emacs_vars(self, text):
+ """Return a dictionary of emacs-style local variables.
+
+ Parsing is done loosely according to this spec (and according to
+ some in-practice deviations from this):
+ http://www.gnu.org/software/emacs/manual/html_node/emacs/Specifying-File-Variables.html#Specifying-File-Variables
+ """
+ emacs_vars = {}
+ SIZE = pow(2, 13) # 8kB
+
+ # Search near the start for a '-*-'-style one-liner of variables.
+ head = text[:SIZE]
+ if "-*-" in head:
+ match = self._emacs_oneliner_vars_pat.search(head)
+ if match:
+ emacs_vars_str = match.group(1)
+ assert '\n' not in emacs_vars_str
+ emacs_var_strs = [s.strip() for s in emacs_vars_str.split(';')
+ if s.strip()]
+ if len(emacs_var_strs) == 1 and ':' not in emacs_var_strs[0]:
+ # While not in the spec, this form is allowed by emacs:
+ # -*- Tcl -*-
+ # where the implied "variable" is "mode". This form
+ # is only allowed if there are no other variables.
+ emacs_vars["mode"] = emacs_var_strs[0].strip()
+ else:
+ for emacs_var_str in emacs_var_strs:
+ try:
+ variable, value = emacs_var_str.strip().split(':', 1)
+ except ValueError:
+ log.debug("emacs variables error: malformed -*- "
+ "line: %r", emacs_var_str)
+ continue
+ # Lowercase the variable name because Emacs allows "Mode"
+ # or "mode" or "MoDe", etc.
+ emacs_vars[variable.lower()] = value.strip()
+
+ tail = text[-SIZE:]
+ if "Local Variables" in tail:
+ match = self._emacs_local_vars_pat.search(tail)
+ if match:
+ prefix = match.group("prefix")
+ suffix = match.group("suffix")
+ lines = match.group("content").splitlines(0)
+ #print "prefix=%r, suffix=%r, content=%r, lines: %s"\
+ # % (prefix, suffix, match.group("content"), lines)
+
+ # Validate the Local Variables block: proper prefix and suffix
+ # usage.
+ for i, line in enumerate(lines):
+ if not line.startswith(prefix):
+ log.debug("emacs variables error: line '%s' "
+ "does not use proper prefix '%s'"
+ % (line, prefix))
+ return {}
+ # Don't validate suffix on last line. Emacs doesn't care,
+ # neither should we.
+ if i != len(lines)-1 and not line.endswith(suffix):
+ log.debug("emacs variables error: line '%s' "
+ "does not use proper suffix '%s'"
+ % (line, suffix))
+ return {}
+
+ # Parse out one emacs var per line.
+ continued_for = None
+ for line in lines[:-1]: # no var on the last line ("PREFIX End:")
+ if prefix: line = line[len(prefix):] # strip prefix
+ if suffix: line = line[:-len(suffix)] # strip suffix
+ line = line.strip()
+ if continued_for:
+ variable = continued_for
+ if line.endswith('\\'):
+ line = line[:-1].rstrip()
+ else:
+ continued_for = None
+ emacs_vars[variable] += ' ' + line
+ else:
+ try:
+ variable, value = line.split(':', 1)
+ except ValueError:
+ log.debug("local variables error: missing colon "
+ "in local variables entry: '%s'" % line)
+ continue
+ # Do NOT lowercase the variable name, because Emacs only
+ # allows "mode" (and not "Mode", "MoDe", etc.) in this block.
+ value = value.strip()
+ if value.endswith('\\'):
+ value = value[:-1].rstrip()
+ continued_for = variable
+ else:
+ continued_for = None
+ emacs_vars[variable] = value
+
+ # Unquote values.
+ for var, val in emacs_vars.items():
+ if len(val) > 1 and (val.startswith('"') and val.endswith('"')
+ or val.startswith('"') and val.endswith('"')):
+ emacs_vars[var] = val[1:-1]
+
+ return emacs_vars
+
+ # Cribbed from a post by Bart Lateur:
+ # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
+ _detab_re = re.compile(r'(.*?)\t', re.M)
+ def _detab_sub(self, match):
+ g1 = match.group(1)
+ return g1 + (' ' * (self.tab_width - len(g1) % self.tab_width))
+ def _detab(self, text):
+ r"""Remove (leading?) tabs from a file.
+
+ >>> m = Markdown()
+ >>> m._detab("\tfoo")
+ ' foo'
+ >>> m._detab(" \tfoo")
+ ' foo'
+ >>> m._detab("\t foo")
+ ' foo'
+ >>> m._detab(" foo")
+ ' foo'
+ >>> m._detab(" foo\n\tbar\tblam")
+ ' foo\n bar blam'
+ """
+ if '\t' not in text:
+ return text
+ return self._detab_re.subn(self._detab_sub, text)[0]
+
+ _block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del'
+ _strict_tag_block_re = re.compile(r"""
+ ( # save in \1
+ ^ # start of line (with re.M)
+ <(%s) # start tag = \2
+ \b # word break
+ (.*\n)*? # any number of lines, minimally matching
+ </\2> # the matching end tag
+ [ \t]* # trailing spaces/tabs
+ (?=\n+|\Z) # followed by a newline or end of document
+ )
+ """ % _block_tags_a,
+ re.X | re.M)
+
+ _block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math'
+ _liberal_tag_block_re = re.compile(r"""
+ ( # save in \1
+ ^ # start of line (with re.M)
+ <(%s) # start tag = \2
+ \b # word break
+ (.*\n)*? # any number of lines, minimally matching
+ .*</\2> # the matching end tag
+ [ \t]* # trailing spaces/tabs
+ (?=\n+|\Z) # followed by a newline or end of document
+ )
+ """ % _block_tags_b,
+ re.X | re.M)
+
+ def _hash_html_block_sub(self, match, raw=False):
+ html = match.group(1)
+ if raw and self.safe_mode:
+ html = self._sanitize_html(html)
+ key = _hash_text(html)
+ self.html_blocks[key] = html
+ return "\n\n" + key + "\n\n"
+
+ def _hash_html_blocks(self, text, raw=False):
+ """Hashify HTML blocks
+
+ We only want to do this for block-level HTML tags, such as headers,
+ lists, and tables. That's because we still want to wrap <p>s around
+ "paragraphs" that are wrapped in non-block-level tags, such as anchors,
+ phrase emphasis, and spans. The list of tags we're looking for is
+ hard-coded.
+
+ @param raw {boolean} indicates if these are raw HTML blocks in
+ the original source. It makes a difference in "safe" mode.
+ """
+ if '<' not in text:
+ return text
+
+ # Pass `raw` value into our calls to self._hash_html_block_sub.
+ hash_html_block_sub = _curry(self._hash_html_block_sub, raw=raw)
+
+ # First, look for nested blocks, e.g.:
+ # <div>
+ # <div>
+ # tags for inner block must be indented.
+ # </div>
+ # </div>
+ #
+ # The outermost tags must start at the left margin for this to match, and
+ # the inner nested divs must be indented.
+ # We need to do this before the next, more liberal match, because the next
+ # match will start at the first `<div>` and stop at the first `</div>`.
+ text = self._strict_tag_block_re.sub(hash_html_block_sub, text)
+
+ # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
+ text = self._liberal_tag_block_re.sub(hash_html_block_sub, text)
+
+ # Special case just for <hr />. It was easier to make a special
+ # case than to make the other regex more complicated.
+ if "<hr" in text:
+ _hr_tag_re = _hr_tag_re_from_tab_width(self.tab_width)
+ text = _hr_tag_re.sub(hash_html_block_sub, text)
+
+ # Special case for standalone HTML comments:
+ if "<!--" in text:
+ start = 0
+ while True:
+ # Delimiters for next comment block.
+ try:
+ start_idx = text.index("<!--", start)
+ except ValueError, ex:
+ break
+ try:
+ end_idx = text.index("-->", start_idx) + 3
+ except ValueError, ex:
+ break
+
+ # Start position for next comment block search.
+ start = end_idx
+
+ # Validate whitespace before comment.
+ if start_idx:
+ # - Up to `tab_width - 1` spaces before start_idx.
+ for i in range(self.tab_width - 1):
+ if text[start_idx - 1] != ' ':
+ break
+ start_idx -= 1
+ if start_idx == 0:
+ break
+ # - Must be preceded by 2 newlines or hit the start of
+ # the document.
+ if start_idx == 0:
+ pass
+ elif start_idx == 1 and text[0] == '\n':
+ start_idx = 0 # to match minute detail of Markdown.pl regex
+ elif text[start_idx-2:start_idx] == '\n\n':
+ pass
+ else:
+ break
+
+ # Validate whitespace after comment.
+ # - Any number of spaces and tabs.
+ while end_idx < len(text):
+ if text[end_idx] not in ' \t':
+ break
+ end_idx += 1
+ # - Must be following by 2 newlines or hit end of text.
+ if text[end_idx:end_idx+2] not in ('', '\n', '\n\n'):
+ continue
+
+ # Escape and hash (must match `_hash_html_block_sub`).
+ html = text[start_idx:end_idx]
+ if raw and self.safe_mode:
+ html = self._sanitize_html(html)
+ key = _hash_text(html)
+ self.html_blocks[key] = html
+ text = text[:start_idx] + "\n\n" + key + "\n\n" + text[end_idx:]
+
+ if "xml" in self.extras:
+ # Treat XML processing instructions and namespaced one-liner
+ # tags as if they were block HTML tags. E.g., if standalone
+ # (i.e. are their own paragraph), the following do not get
+ # wrapped in a <p> tag:
+ # <?foo bar?>
+ #
+ # <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="chapter_1.md"/>
+ _xml_oneliner_re = _xml_oneliner_re_from_tab_width(self.tab_width)
+ text = _xml_oneliner_re.sub(hash_html_block_sub, text)
+
+ return text
+
+ def _strip_link_definitions(self, text):
+ # Strips link definitions from text, stores the URLs and titles in
+ # hash references.
+ less_than_tab = self.tab_width - 1
+
+ # Link defs are in the form:
+ # [id]: url "optional title"
+ _link_def_re = re.compile(r"""
+ ^[ ]{0,%d}\[(.+)\]: # id = \1
+ [ \t]*
+ \n? # maybe *one* newline
+ [ \t]*
+ <?(.+?)>? # url = \2
+ [ \t]*
+ (?:
+ \n? # maybe one newline
+ [ \t]*
+ (?<=\s) # lookbehind for whitespace
+ ['"(]
+ ([^\n]*) # title = \3
+ ['")]
+ [ \t]*
+ )? # title is optional
+ (?:\n+|\Z)
+ """ % less_than_tab, re.X | re.M | re.U)
+ return _link_def_re.sub(self._extract_link_def_sub, text)
+
+ def _extract_link_def_sub(self, match):
+ id, url, title = match.groups()
+ key = id.lower() # Link IDs are case-insensitive
+ self.urls[key] = self._encode_amps_and_angles(url)
+ if title:
+ self.titles[key] = title.replace('"', '&quot;')
+ return ""
+
+ def _extract_footnote_def_sub(self, match):
+ id, text = match.groups()
+ text = _dedent(text, skip_first_line=not text.startswith('\n')).strip()
+ normed_id = re.sub(r'\W', '-', id)
+ # Ensure footnote text ends with a couple newlines (for some
+ # block gamut matches).
+ self.footnotes[normed_id] = text + "\n\n"
+ return ""
+
+ def _strip_footnote_definitions(self, text):
+ """A footnote definition looks like this:
+
+ [^note-id]: Text of the note.
+
+ May include one or more indented paragraphs.
+
+ Where,
+ - The 'note-id' can be pretty much anything, though typically it
+ is the number of the footnote.
+ - The first paragraph may start on the next line, like so:
+
+ [^note-id]:
+ Text of the note.
+ """
+ less_than_tab = self.tab_width - 1
+ footnote_def_re = re.compile(r'''
+ ^[ ]{0,%d}\[\^(.+)\]: # id = \1
+ [ \t]*
+ ( # footnote text = \2
+ # First line need not start with the spaces.
+ (?:\s*.*\n+)
+ (?:
+ (?:[ ]{%d} | \t) # Subsequent lines must be indented.
+ .*\n+
+ )*
+ )
+ # Lookahead for non-space at line-start, or end of doc.
+ (?:(?=^[ ]{0,%d}\S)|\Z)
+ ''' % (less_than_tab, self.tab_width, self.tab_width),
+ re.X | re.M)
+ return footnote_def_re.sub(self._extract_footnote_def_sub, text)
+
+
+ _hr_res = [
+ re.compile(r"^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$", re.M),
+ re.compile(r"^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$", re.M),
+ re.compile(r"^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$", re.M),
+ ]
+
+ def _run_block_gamut(self, text):
+ # These are all the transformations that form block-level
+ # tags like paragraphs, headers, and list items.
+
+ text = self._do_headers(text)
+
+ # Do Horizontal Rules:
+ hr = "\n<hr"+self.empty_element_suffix+"\n"
+ for hr_re in self._hr_res:
+ text = hr_re.sub(hr, text)
+
+ text = self._do_lists(text)
+
+ if "pyshell" in self.extras:
+ text = self._prepare_pyshell_blocks(text)
+
+ text = self._do_code_blocks(text)
+
+ text = self._do_block_quotes(text)
+
+ # We already ran _HashHTMLBlocks() before, in Markdown(), but that
+ # was to escape raw HTML in the original Markdown source. This time,
+ # we're escaping the markup we've just created, so that we don't wrap
+ # <p> tags around block-level tags.
+ text = self._hash_html_blocks(text)
+
+ text = self._form_paragraphs(text)
+
+ return text
+
+ def _pyshell_block_sub(self, match):
+ lines = match.group(0).splitlines(0)
+ _dedentlines(lines)
+ indent = ' ' * self.tab_width
+ s = ('\n' # separate from possible cuddled paragraph
+ + indent + ('\n'+indent).join(lines)
+ + '\n\n')
+ return s
+
+ def _prepare_pyshell_blocks(self, text):
+ """Ensure that Python interactive shell sessions are put in
+ code blocks -- even if not properly indented.
+ """
+ if ">>>" not in text:
+ return text
+
+ less_than_tab = self.tab_width - 1
+ _pyshell_block_re = re.compile(r"""
+ ^([ ]{0,%d})>>>[ ].*\n # first line
+ ^(\1.*\S+.*\n)* # any number of subsequent lines
+ ^\n # ends with a blank line
+ """ % less_than_tab, re.M | re.X)
+
+ return _pyshell_block_re.sub(self._pyshell_block_sub, text)
+
+ def _run_span_gamut(self, text):
+ # These are all the transformations that occur *within* block-level
+ # tags like paragraphs, headers, and list items.
+
+ text = self._do_code_spans(text)
+
+ text = self._escape_special_chars(text)
+
+ # Process anchor and image tags.
+ text = self._do_links(text)
+
+ # Make links out of things like `<http://example.com/>`
+ # Must come after _do_links(), because you can use < and >
+ # delimiters in inline links like [this](<url>).
+ text = self._do_auto_links(text)
+
+ if "link-patterns" in self.extras:
+ text = self._do_link_patterns(text)
+
+ text = self._encode_amps_and_angles(text)
+
+ text = self._do_italics_and_bold(text)
+
+ # Do hard breaks:
+ text = re.sub(r" {2,}\n", " <br%s\n" % self.empty_element_suffix, text)
+
+ return text
+
+ # "Sorta" because auto-links are identified as "tag" tokens.
+ _sorta_html_tokenize_re = re.compile(r"""
+ (
+ # tag
+ </?
+ (?:\w+) # tag name
+ (?:\s+(?:[\w-]+:)?[\w-]+=(?:".*?"|'.*?'))* # attributes
+ \s*/?>
+ |
+ # auto-link (e.g., <http://www.activestate.com/>)
+ <\w+[^>]*>
+ |
+ <!--.*?--> # comment
+ |
+ <\?.*?\?> # processing instruction
+ )
+ """, re.X)
+
+ def _escape_special_chars(self, text):
+ # Python markdown note: the HTML tokenization here differs from
+ # that in Markdown.pl, hence the behaviour for subtle cases can
+ # differ (I believe the tokenizer here does a better job because
+ # it isn't susceptible to unmatched '<' and '>' in HTML tags).
+ # Note, however, that '>' is not allowed in an auto-link URL
+ # here.
+ escaped = []
+ is_html_markup = False
+ for token in self._sorta_html_tokenize_re.split(text):
+ if is_html_markup:
+ # Within tags/HTML-comments/auto-links, encode * and _
+ # so they don't conflict with their use in Markdown for
+ # italics and strong. We're replacing each such
+ # character with its corresponding MD5 checksum value;
+ # this is likely overkill, but it should prevent us from
+ # colliding with the escape values by accident.
+ escaped.append(token.replace('*', g_escape_table['*'])
+ .replace('_', g_escape_table['_']))
+ else:
+ escaped.append(self._encode_backslash_escapes(token))
+ is_html_markup = not is_html_markup
+ return ''.join(escaped)
+
+ def _hash_html_spans(self, text):
+ # Used for safe_mode.
+
+ def _is_auto_link(s):
+ if ':' in s and self._auto_link_re.match(s):
+ return True
+ elif '@' in s and self._auto_email_link_re.match(s):
+ return True
+ return False
+
+ tokens = []
+ is_html_markup = False
+ for token in self._sorta_html_tokenize_re.split(text):
+ if is_html_markup and not _is_auto_link(token):
+ sanitized = self._sanitize_html(token)
+ key = _hash_text(sanitized)
+ self.html_spans[key] = sanitized
+ tokens.append(key)
+ else:
+ tokens.append(token)
+ is_html_markup = not is_html_markup
+ return ''.join(tokens)
+
+ def _unhash_html_spans(self, text):
+ for key, sanitized in self.html_spans.items():
+ text = text.replace(key, sanitized)
+ return text
+
+ def _sanitize_html(self, s):
+ if self.safe_mode == "replace":
+ return self.html_removed_text
+ elif self.safe_mode == "escape":
+ replacements = [
+ ('&', '&amp;'),
+ ('<', '&lt;'),
+ ('>', '&gt;'),
+ ]
+ for before, after in replacements:
+ s = s.replace(before, after)
+ return s
+ else:
+ raise MarkdownError("invalid value for 'safe_mode': %r (must be "
+ "'escape' or 'replace')" % self.safe_mode)
+
+ _tail_of_inline_link_re = re.compile(r'''
+ # Match tail of: [text](/url/) or [text](/url/ "title")
+ \( # literal paren
+ [ \t]*
+ (?P<url> # \1
+ <.*?>
+ |
+ .*?
+ )
+ [ \t]*
+ ( # \2
+ (['"]) # quote char = \3
+ (?P<title>.*?)
+ \3 # matching quote
+ )? # title is optional
+ \)
+ ''', re.X | re.S)
+ _tail_of_reference_link_re = re.compile(r'''
+ # Match tail of: [text][id]
+ [ ]? # one optional space
+ (?:\n[ ]*)? # one optional newline followed by spaces
+ \[
+ (?P<id>.*?)
+ \]
+ ''', re.X | re.S)
+
+ def _do_links(self, text):
+ """Turn Markdown link shortcuts into XHTML <a> and <img> tags.
+
+ This is a combination of Markdown.pl's _DoAnchors() and
+ _DoImages(). They are done together because that simplified the
+ approach. It was necessary to use a different approach than
+ Markdown.pl because of the lack of atomic matching support in
+ Python's regex engine used in $g_nested_brackets.
+ """
+ MAX_LINK_TEXT_SENTINEL = 3000 # markdown2 issue 24
+
+ # `anchor_allowed_pos` is used to support img links inside
+ # anchors, but not anchors inside anchors. An anchor's start
+ # pos must be `>= anchor_allowed_pos`.
+ anchor_allowed_pos = 0
+
+ curr_pos = 0
+ while True: # Handle the next link.
+ # The next '[' is the start of:
+ # - an inline anchor: [text](url "title")
+ # - a reference anchor: [text][id]
+ # - an inline img: ![text](url "title")
+ # - a reference img: ![text][id]
+ # - a footnote ref: [^id]
+ # (Only if 'footnotes' extra enabled)
+ # - a footnote defn: [^id]: ...
+ # (Only if 'footnotes' extra enabled) These have already
+ # been stripped in _strip_footnote_definitions() so no
+ # need to watch for them.
+ # - a link definition: [id]: url "title"
+ # These have already been stripped in
+ # _strip_link_definitions() so no need to watch for them.
+ # - not markup: [...anything else...
+ try:
+ start_idx = text.index('[', curr_pos)
+ except ValueError:
+ break
+ text_length = len(text)
+
+ # Find the matching closing ']'.
+ # Markdown.pl allows *matching* brackets in link text so we
+ # will here too. Markdown.pl *doesn't* currently allow
+ # matching brackets in img alt text -- we'll differ in that
+ # regard.
+ bracket_depth = 0
+ for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL,
+ text_length)):
+ ch = text[p]
+ if ch == ']':
+ bracket_depth -= 1
+ if bracket_depth < 0:
+ break
+ elif ch == '[':
+ bracket_depth += 1
+ else:
+ # Closing bracket not found within sentinel length.
+ # This isn't markup.
+ curr_pos = start_idx + 1
+ continue
+ link_text = text[start_idx+1:p]
+
+ # Possibly a footnote ref?
+ if "footnotes" in self.extras and link_text.startswith("^"):
+ normed_id = re.sub(r'\W', '-', link_text[1:])
+ if normed_id in self.footnotes:
+ self.footnote_ids.append(normed_id)
+ result = '<sup class="footnote-ref" id="fnref-%s">' \
+ '<a href="#fn-%s">%s</a></sup>' \
+ % (normed_id, normed_id, len(self.footnote_ids))
+ text = text[:start_idx] + result + text[p+1:]
+ else:
+ # This id isn't defined, leave the markup alone.
+ curr_pos = p+1
+ continue
+
+ # Now determine what this is by the remainder.
+ p += 1
+ if p == text_length:
+ return text
+
+ # Inline anchor or img?
+ if text[p] == '(': # attempt at perf improvement
+ match = self._tail_of_inline_link_re.match(text, p)
+ if match:
+ # Handle an inline anchor or img.
+ is_img = start_idx > 0 and text[start_idx-1] == "!"
+ if is_img:
+ start_idx -= 1
+
+ url, title = match.group("url"), match.group("title")
+ if url and url[0] == '<':
+ url = url[1:-1] # '<url>' -> 'url'
+ # We've got to encode these to avoid conflicting
+ # with italics/bold.
+ url = url.replace('*', g_escape_table['*']) \
+ .replace('_', g_escape_table['_'])
+ if title:
+ title_str = ' title="%s"' \
+ % title.replace('*', g_escape_table['*']) \
+ .replace('_', g_escape_table['_']) \
+ .replace('"', '&quot;')
+ else:
+ title_str = ''
+ if is_img:
+ result = '<img src="%s" alt="%s"%s%s' \
+ % (url, link_text.replace('"', '&quot;'),
+ title_str, self.empty_element_suffix)
+ curr_pos = start_idx + len(result)
+ text = text[:start_idx] + result + text[match.end():]
+ elif start_idx >= anchor_allowed_pos:
+ result_head = '<a href="%s"%s>' % (url, title_str)
+ result = '%s%s</a>' % (result_head, link_text)
+ # <img> allowed from curr_pos on, <a> from
+ # anchor_allowed_pos on.
+ curr_pos = start_idx + len(result_head)
+ anchor_allowed_pos = start_idx + len(result)
+ text = text[:start_idx] + result + text[match.end():]
+ else:
+ # Anchor not allowed here.
+ curr_pos = start_idx + 1
+ continue
+
+ # Reference anchor or img?
+ else:
+ match = self._tail_of_reference_link_re.match(text, p)
+ if match:
+ # Handle a reference-style anchor or img.
+ is_img = start_idx > 0 and text[start_idx-1] == "!"
+ if is_img:
+ start_idx -= 1
+ link_id = match.group("id").lower()
+ if not link_id:
+ link_id = link_text.lower() # for links like [this][]
+ if link_id in self.urls:
+ url = self.urls[link_id]
+ # We've got to encode these to avoid conflicting
+ # with italics/bold.
+ url = url.replace('*', g_escape_table['*']) \
+ .replace('_', g_escape_table['_'])
+ title = self.titles.get(link_id)
+ if title:
+ title = title.replace('*', g_escape_table['*']) \
+ .replace('_', g_escape_table['_'])
+ title_str = ' title="%s"' % title
+ else:
+ title_str = ''
+ if is_img:
+ result = '<img src="%s" alt="%s"%s%s' \
+ % (url, link_text.replace('"', '&quot;'),
+ title_str, self.empty_element_suffix)
+ curr_pos = start_idx + len(result)
+ text = text[:start_idx] + result + text[match.end():]
+ elif start_idx >= anchor_allowed_pos:
+ result = '<a href="%s"%s>%s</a>' \
+ % (url, title_str, link_text)
+ result_head = '<a href="%s"%s>' % (url, title_str)
+ result = '%s%s</a>' % (result_head, link_text)
+ # <img> allowed from curr_pos on, <a> from
+ # anchor_allowed_pos on.
+ curr_pos = start_idx + len(result_head)
+ anchor_allowed_pos = start_idx + len(result)
+ text = text[:start_idx] + result + text[match.end():]
+ else:
+ # Anchor not allowed here.
+ curr_pos = start_idx + 1
+ else:
+ # This id isn't defined, leave the markup alone.
+ curr_pos = match.end()
+ continue
+
+ # Otherwise, it isn't markup.
+ curr_pos = start_idx + 1
+
+ return text
+
+
+ _setext_h_re = re.compile(r'^(.+)[ \t]*\n(=+|-+)[ \t]*\n+', re.M)
+ def _setext_h_sub(self, match):
+ n = {"=": 1, "-": 2}[match.group(2)[0]]
+ demote_headers = self.extras.get("demote-headers")
+ if demote_headers:
+ n = min(n + demote_headers, 6)
+ return "<h%d>%s</h%d>\n\n" \
+ % (n, self._run_span_gamut(match.group(1)), n)
+
+ _atx_h_re = re.compile(r'''
+ ^(\#{1,6}) # \1 = string of #'s
+ [ \t]*
+ (.+?) # \2 = Header text
+ [ \t]*
+ (?<!\\) # ensure not an escaped trailing '#'
+ \#* # optional closing #'s (not counted)
+ \n+
+ ''', re.X | re.M)
+ def _atx_h_sub(self, match):
+ n = len(match.group(1))
+ demote_headers = self.extras.get("demote-headers")
+ if demote_headers:
+ n = min(n + demote_headers, 6)
+ return "<h%d>%s</h%d>\n\n" \
+ % (n, self._run_span_gamut(match.group(2)), n)
+
+ def _do_headers(self, text):
+ # Setext-style headers:
+ # Header 1
+ # ========
+ #
+ # Header 2
+ # --------
+ text = self._setext_h_re.sub(self._setext_h_sub, text)
+
+ # atx-style headers:
+ # # Header 1
+ # ## Header 2
+ # ## Header 2 with closing hashes ##
+ # ...
+ # ###### Header 6
+ text = self._atx_h_re.sub(self._atx_h_sub, text)
+
+ return text
+
+
+ _marker_ul_chars = '*+-'
+ _marker_any = r'(?:[%s]|\d+\.)' % _marker_ul_chars
+ _marker_ul = '(?:[%s])' % _marker_ul_chars
+ _marker_ol = r'(?:\d+\.)'
+
+ def _list_sub(self, match):
+ lst = match.group(1)
+ lst_type = match.group(3) in self._marker_ul_chars and "ul" or "ol"
+ result = self._process_list_items(lst)
+ if self.list_level:
+ return "<%s>\n%s</%s>\n" % (lst_type, result, lst_type)
+ else:
+ return "<%s>\n%s</%s>\n\n" % (lst_type, result, lst_type)
+
+ def _do_lists(self, text):
+ # Form HTML ordered (numbered) and unordered (bulleted) lists.
+
+ for marker_pat in (self._marker_ul, self._marker_ol):
+ # Re-usable pattern to match any entire ul or ol list:
+ less_than_tab = self.tab_width - 1
+ whole_list = r'''
+ ( # \1 = whole list
+ ( # \2
+ [ ]{0,%d}
+ (%s) # \3 = first list item marker
+ [ \t]+
+ )
+ (?:.+?)
+ ( # \4
+ \Z
+ |
+ \n{2,}
+ (?=\S)
+ (?! # Negative lookahead for another list item marker
+ [ \t]*
+ %s[ \t]+
+ )
+ )
+ )
+ ''' % (less_than_tab, marker_pat, marker_pat)
+
+ # We use a different prefix before nested lists than top-level lists.
+ # See extended comment in _process_list_items().
+ #
+ # Note: There's a bit of duplication here. My original implementation
+ # created a scalar regex pattern as the conditional result of the test on
+ # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
+ # substitution once, using the scalar as the pattern. This worked,
+ # everywhere except when running under MT on my hosting account at Pair
+ # Networks. There, this caused all rebuilds to be killed by the reaper (or
+ # perhaps they crashed, but that seems incredibly unlikely given that the
+ # same script on the same server ran fine *except* under MT. I've spent
+ # more time trying to figure out why this is happening than I'd like to
+ # admit. My only guess, backed up by the fact that this workaround works,
+ # is that Perl optimizes the substition when it can figure out that the
+ # pattern will never change, and when this optimization isn't on, we run
+ # afoul of the reaper. Thus, the slightly redundant code to that uses two
+ # static s/// patterns rather than one conditional pattern.
+
+ if self.list_level:
+ sub_list_re = re.compile("^"+whole_list, re.X | re.M | re.S)
+ text = sub_list_re.sub(self._list_sub, text)
+ else:
+ list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)"+whole_list,
+ re.X | re.M | re.S)
+ text = list_re.sub(self._list_sub, text)
+
+ return text
+
+ _list_item_re = re.compile(r'''
+ (\n)? # leading line = \1
+ (^[ \t]*) # leading whitespace = \2
+ (%s) [ \t]+ # list marker = \3
+ ((?:.+?) # list item text = \4
+ (\n{1,2})) # eols = \5
+ (?= \n* (\Z | \2 (%s) [ \t]+))
+ ''' % (_marker_any, _marker_any),
+ re.M | re.X | re.S)
+
+ _last_li_endswith_two_eols = False
+ def _list_item_sub(self, match):
+ item = match.group(4)
+ leading_line = match.group(1)
+ leading_space = match.group(2)
+ if leading_line or "\n\n" in item or self._last_li_endswith_two_eols:
+ item = self._run_block_gamut(self._outdent(item))
+ else:
+ # Recursion for sub-lists:
+ item = self._do_lists(self._outdent(item))
+ if item.endswith('\n'):
+ item = item[:-1]
+ item = self._run_span_gamut(item)
+ self._last_li_endswith_two_eols = (len(match.group(5)) == 2)
+ return "<li>%s</li>\n" % item
+
+ def _process_list_items(self, list_str):
+ # Process the contents of a single ordered or unordered list,
+ # splitting it into individual list items.
+
+ # The $g_list_level global keeps track of when we're inside a list.
+ # Each time we enter a list, we increment it; when we leave a list,
+ # we decrement. If it's zero, we're not in a list anymore.
+ #
+ # We do this because when we're not inside a list, we want to treat
+ # something like this:
+ #
+ # I recommend upgrading to version
+ # 8. Oops, now this line is treated
+ # as a sub-list.
+ #
+ # As a single paragraph, despite the fact that the second line starts
+ # with a digit-period-space sequence.
+ #
+ # Whereas when we're inside a list (or sub-list), that line will be
+ # treated as the start of a sub-list. What a kludge, huh? This is
+ # an aspect of Markdown's syntax that's hard to parse perfectly
+ # without resorting to mind-reading. Perhaps the solution is to
+ # change the syntax rules such that sub-lists must start with a
+ # starting cardinal number; e.g. "1." or "a.".
+ self.list_level += 1
+ self._last_li_endswith_two_eols = False
+ list_str = list_str.rstrip('\n') + '\n'
+ list_str = self._list_item_re.sub(self._list_item_sub, list_str)
+ self.list_level -= 1
+ return list_str
+
+ def _get_pygments_lexer(self, lexer_name):
+ try:
+ from pygments import lexers, util
+ except ImportError:
+ return None
+ try:
+ return lexers.get_lexer_by_name(lexer_name)
+ except util.ClassNotFound:
+ return None
+
+ def _color_with_pygments(self, codeblock, lexer, **formatter_opts):
+ import pygments
+ import pygments.formatters
+
+ class HtmlCodeFormatter(pygments.formatters.HtmlFormatter):
+ def _wrap_code(self, inner):
+ """A function for use in a Pygments Formatter which
+ wraps in <code> tags.
+ """
+ yield 0, "<code>"
+ for tup in inner:
+ yield tup
+ yield 0, "</code>"
+
+ def wrap(self, source, outfile):
+ """Return the source with a code, pre, and div."""
+ return self._wrap_div(self._wrap_pre(self._wrap_code(source)))
+
+ formatter = HtmlCodeFormatter(cssclass="codehilite", **formatter_opts)
+ return pygments.highlight(codeblock, lexer, formatter)
+
+ def _code_block_sub(self, match):
+ codeblock = match.group(1)
+ codeblock = self._outdent(codeblock)
+ codeblock = self._detab(codeblock)
+ codeblock = codeblock.lstrip('\n') # trim leading newlines
+ codeblock = codeblock.rstrip() # trim trailing whitespace
+
+ if "code-color" in self.extras and codeblock.startswith(":::"):
+ lexer_name, rest = codeblock.split('\n', 1)
+ lexer_name = lexer_name[3:].strip()
+ lexer = self._get_pygments_lexer(lexer_name)
+ codeblock = rest.lstrip("\n") # Remove lexer declaration line.
+ if lexer:
+ formatter_opts = self.extras['code-color'] or {}
+ colored = self._color_with_pygments(codeblock, lexer,
+ **formatter_opts)
+ return "\n\n%s\n\n" % colored
+
+ codeblock = self._encode_code(codeblock)
+ return "\n\n<pre><code>%s\n</code></pre>\n\n" % codeblock
+
+ def _do_code_blocks(self, text):
+ """Process Markdown `<pre><code>` blocks."""
+ code_block_re = re.compile(r'''
+ (?:\n\n|\A)
+ ( # $1 = the code block -- one or more lines, starting with a space/tab
+ (?:
+ (?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces
+ .*\n+
+ )+
+ )
+ ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
+ ''' % (self.tab_width, self.tab_width),
+ re.M | re.X)
+
+ return code_block_re.sub(self._code_block_sub, text)
+
+
+ # Rules for a code span:
+ # - backslash escapes are not interpreted in a code span
+ # - to include one or or a run of more backticks the delimiters must
+ # be a longer run of backticks
+ # - cannot start or end a code span with a backtick; pad with a
+ # space and that space will be removed in the emitted HTML
+ # See `test/tm-cases/escapes.text` for a number of edge-case
+ # examples.
+ _code_span_re = re.compile(r'''
+ (?<!\\)
+ (`+) # \1 = Opening run of `
+ (?!`) # See Note A test/tm-cases/escapes.text
+ (.+?) # \2 = The code block
+ (?<!`)
+ \1 # Matching closer
+ (?!`)
+ ''', re.X | re.S)
+
+ def _code_span_sub(self, match):
+ c = match.group(2).strip(" \t")
+ c = self._encode_code(c)
+ return "<code>%s</code>" % c
+
+ def _do_code_spans(self, text):
+ # * Backtick quotes are used for <code></code> spans.
+ #
+ # * You can use multiple backticks as the delimiters if you want to
+ # include literal backticks in the code span. So, this input:
+ #
+ # Just type ``foo `bar` baz`` at the prompt.
+ #
+ # Will translate to:
+ #
+ # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
+ #
+ # There's no arbitrary limit to the number of backticks you
+ # can use as delimters. If you need three consecutive backticks
+ # in your code, use four for delimiters, etc.
+ #
+ # * You can use spaces to get literal backticks at the edges:
+ #
+ # ... type `` `bar` `` ...
+ #
+ # Turns to:
+ #
+ # ... type <code>`bar`</code> ...
+ return self._code_span_re.sub(self._code_span_sub, text)
+
+ def _encode_code(self, text):
+ """Encode/escape certain characters inside Markdown code runs.
+ The point is that in code, these characters are literals,
+ and lose their special Markdown meanings.
+ """
+ replacements = [
+ # Encode all ampersands; HTML entities are not
+ # entities within a Markdown code span.
+ ('&', '&amp;'),
+ # Do the angle bracket song and dance:
+ ('<', '&lt;'),
+ ('>', '&gt;'),
+ # Now, escape characters that are magic in Markdown:
+ ('*', g_escape_table['*']),
+ ('_', g_escape_table['_']),
+ ('{', g_escape_table['{']),