Permalink
Browse files

Merge pull request #89 from mollyproject/batch-to-celery

Batch processing replaced with Celery tasks.
  • Loading branch information...
davbo committed May 3, 2012
2 parents 3d7fcb6 + e8042f1 commit 7e5ae8ffd25a19a7fab80549d6f2e5effdf32a53
Showing with 418 additions and 727 deletions.
  1. +24 −0 docs/source/getting-started/configuring.txt
  2. +23 −4 docs/source/getting-started/installing.txt
  3. +0 −13 docs/source/ref/batch-processing.txt
  4. +39 −10 docs/source/topics/application-framework.txt
  5. +3 −2 molly/apps/contact/providers/__init__.py
  6. +3 −1 molly/apps/feeds/providers/__init__.py
  7. +7 −17 molly/apps/feeds/providers/ical.py
  8. +7 −18 molly/apps/feeds/providers/rss.py
  9. +7 −17 molly/apps/feeds/providers/talks_cam.py
  10. +3 −4 molly/apps/home/views.py
  11. +5 −3 molly/apps/library/providers/__init__.py
  12. +5 −2 molly/apps/places/providers/__init__.py
  13. +8 −13 molly/apps/places/providers/acislive.py
  14. +6 −6 molly/apps/places/providers/atcocif.py
  15. +4 −3 molly/apps/places/providers/bbc_tpeg.py
  16. +4 −5 molly/apps/places/providers/cif.py
  17. +5 −4 molly/apps/places/providers/naptan.py
  18. +12 −10 molly/apps/places/providers/osm.py
  19. +4 −3 molly/apps/places/providers/postcodes.py
  20. +4 −2 molly/apps/podcasts/providers/__init__.py
  21. +5 −9 molly/apps/podcasts/providers/opml.py
  22. +4 −4 molly/apps/podcasts/providers/pp.py
  23. +6 −5 molly/apps/podcasts/providers/rss.py
  24. +2 −2 molly/apps/podcasts/tests.py
  25. +3 −2 molly/apps/search/providers/__init__.py
  26. +2 −1 molly/apps/service_status/providers/rss_module.py
  27. +5 −2 molly/apps/transport/providers/__init__.py
  28. +4 −7 molly/apps/weather/providers/bbc.py
  29. +0 −222 molly/batch_processing/__init__.py
  30. +0 −23 molly/batch_processing/admin.py
  31. 0 molly/batch_processing/management/__init__.py
  32. 0 molly/batch_processing/management/commands/__init__.py
  33. +0 −70 molly/batch_processing/management/commands/create_crontab.py
  34. +0 −11 molly/batch_processing/management/commands/run_batch.py
  35. +0 −53 molly/batch_processing/migrations/0001_initial.py
  36. +0 −40 molly/batch_processing/migrations/0002_auto__add_field_batch_last_run_failed.py
  37. 0 molly/batch_processing/migrations/__init__.py
  38. +0 −98 molly/batch_processing/models.py
  39. +0 −8 molly/batch_processing/scripts/run_batch.py
  40. +15 −3 molly/commands/site_template/settings.py
  41. +18 −0 molly/conf/admin.py
  42. +32 −0 molly/conf/celery_util.py
  43. +1 −1 molly/conf/default_settings.py
  44. +128 −0 molly/conf/provider.py
  45. +6 −15 molly/conf/settings.py
  46. +5 −2 molly/geolocation/providers/__init__.py
  47. +4 −0 molly/urls.py
  48. +2 −11 molly/utils/management/commands/deploy.py
  49. +3 −1 setup.py
@@ -262,6 +262,30 @@ The following settings will make this "just work" with Molly::
PIPELINE_CSS_COMPRESSOR = 'molly.utils.compress.MollyCSSFilter'
PIPELINE_JS_COMPRESSOR = 'pipeline.compressors.jsmin.JSMinCompressor'
+Celery settings
+^^^^^^^^^^^^^^^
+.. versionadded:: 1.4
+
+We include a few sane-defaults for running Celery. These are::
+
+ BROKER_URL = "amqp://molly:molly@localhost:5672//"
+ CELERYBEAT_SCHEDULER = "djcelery.schedulers.DatabaseScheduler"
+ CELERYD_CONCURRENCY = 1
+ CELERY_RETRY_DELAY = 3 * 60
+ CELERY_MAX_RETRIES = 3
+
+The only setting you should worry about here is the `BROKER_URL`. This setting
+is passed from Celery to the transport layer library Kombu, which has excellent
+`documentation for the possible broker options`__ and their limitations. The
+default we provide will attempt to use RabbitMQ with vhost `molly`, on
+`localhost`, connecting as user `molly`.
+
+__ http://ask.github.com/kombu/userguide/connections.html#urls
+
+Remaining options are best explained in the `Celery documentation`__.
+
+__ http://docs.celeryproject.org/en/2.4/configuration.html
+
Molly settings
^^^^^^^^^^^^^^
@@ -16,14 +16,14 @@ In order to install Molly, there are several non-Python dependencies which are n
Fedora
""""""
-.. code-block ::
+.. code-block:: none
su -c 'yum install python-pip libxml-devel libxslt-devel python-devel postgresql-devel openldap-devel openssl-devel gdal-python proj geos libjpeg-devel imagemagick gcc make git libyaml'
CentOS
""""""
-.. code-block ::
+.. code-block:: none
su -c 'yum install python-pip python26 python-setuptools python26-devel binutils libxslt-devel cyrus-sasl-devel openldap-devel ImageMagick proj proj-devel postgresql-devel postgresql-contrib geos-3.1.0 geos-devel-3.1.0 gdal libjpeg-devel make gcc openssl-devel libyaml-devel'
@@ -34,13 +34,13 @@ Ubuntu/Debian
of libgeos, libgdal and postgresql which changes the package name. The
version below are for Ubuntu 10.04.
-.. code-block ::
+.. code-block:: none
sudo apt-get install python-pip build-essential python-gdal proj libgeos-3.1.0 binutils libgdal1-1.6.0 postgresql-server-dev-8.4 python-setuptools python-dev libxslt-dev libldap2-dev libsasl2-dev libjpeg-dev imagemagick libyaml
.. note:: Versions below are for Ubuntu 10.11
-.. code-block ::
+.. code-block:: none
sudo apt-get install python-pip build-essential python-gdal proj libgeos-3.2.2 binutils libgdal1-1.7.0 postgresql-server-dev-8.4 python-setuptools python-dev libxslt1-dev libldap2-dev libsasl2-dev libjpeg62-dev imagemagick python-yaml
@@ -219,6 +219,25 @@ You will also have a ``compiled_media`` folder, which should be ignored, and a
``site_media`` folder, which is where you should put any overrides for media on
your site.
+Running Celery
+--------------
+
+.. versionadded:: 1.4
+
+Molly now runs it's periodic tasks (e.g. importing map data from OpenStreetMap)
+by using the popular Distributed Task Queue, `Celery <http://celeryproject.org>`_.
+
+Celery requires us to install a *message broker*, the most popular choice here is
+`RabbitMQ <http://www.rabbitmq.com>`_. There are other brokers available, as always
+we recommend reviewing the excellent Celery documentation to learn more.
+
+Molly's installation will have setup the celery worker, ``celeryd`` and task
+scheduler ``celerybeat`` for us. We just have to start them::
+
+ python manage.py celeryd
+ python manage.py celerybeat
+
+
Deploying Molly
---------------
@@ -1,13 +0,0 @@
-:mod:`molly.batch_processing` -- Batch Processing
-=================================================
-
-.. module :: molly.batch_processing
-
-This allows for other applications to set up jobs to run on a schedule.
-
-Configuration
--------------
-
-This application has no configuration, and is not added as an ``Application``
-to the ``APPLICATIONS`` list in ``settings.py``, it should be added to the
-``INSTALLED_APPS`` list like a standard Django app
@@ -204,20 +204,49 @@ A provider maps an external interface onto the model used by the application.
Most applications provide a ``providers.BaseProvider`` class which specifies
an interface to be implemented by a provider for that application.
-Extra base classes
-------------------
+.. versionadded:: 1.4
+ Providers now all subclass ``molly.conf.provider.Provider``
+.. Extra base classes
+.. ------------------ TODO?
+Task Processing
+---------------
+.. versionadded:: 1.4
-Batch jobs
-----------
+Celery is used to provide asynchronous task processing. For an introduction to
+the basics of Celery we recommend you take a look at the `"Getting Started with
+Celery"`__ guide.
-A provider can annotate methods to be included in a crontab using the
-:meth:`molly.conf.settings.batch` decorator::
+__ http://docs.celeryproject.org/en/latest/getting-started/index.html
- @batch('%d 9 * * mon' % random.randint(0, 59))
- def import_data(self, metadata, output):
- # do stuff
+Molly uses a modified version of the Celery task decorator located in
+``molly.conf.provider.task`` this should be used in a similar the previous
+@batch decorator to identify any methods on a provider to run async via celery.
+
+See this (simplified) example from ``molly.apps.feeds.providers.rss``::
+
+ @task(run_every=timedelta(minutes=60))
+ def import_data(self, **metadata):
+ """
+ Pulls RSS feeds
+ """
+
+ from molly.apps.feeds.models import Feed
+ for feed in Feed.objects.filter(provider=self.class_path):
+ logger.debug("Importing: %s - %s" % (feed.title, feed.rss_url))
+ self.import_feed.delay(feed)
return metadata
-For more information, see :doc:`../ref/batch-processing`.
+ # Override CELERY_RETRY_DELAY and CELERY_MAX_RETRIES
+ @task(default_retry_delay=5, max_retries=2)
+ def import_feed(self, feed):
+ from molly.apps.feeds.models import Item
+ feed_data = feedparser.parse(feed.rss_url)
+ # Do stuff with feed_data
+
+We can iterate through all feeds and launch tasks to import them asynchronously
+using ``task.delay()``. This convention has been applied through all the
+standard providers packaged with Molly. Note the default_retry_delay and max_retries
+are overridden on import_feed. This means each feed will only be retried 2 times, with
+5 seconds between each of those retries.
@@ -1,7 +1,8 @@
+from molly.conf.provider import Provider
from molly.apps.contact.forms import GenericContactForm
from django.utils.translation import ugettext as _
-class BaseContactProvider(object):
+class BaseContactProvider(Provider):
class NoSuchResult(KeyError):
pass
@@ -88,4 +89,4 @@ class TooManyResults(Exception):
pass
else:
del ldap
- from mit import LDAPContactProvider
+ from mit import LDAPContactProvider
@@ -1,4 +1,6 @@
-class BaseFeedsProvider(object):
+from molly.conf.provider import Provider
+
+class BaseFeedsProvider(Provider):
pass
from rss import RSSFeedsProvider
@@ -1,15 +1,13 @@
-from datetime import datetime
+from datetime import datetime, timedelta
import urllib2
-import random
-import traceback
import logging
from icalendar import Calendar
from icalendar.prop import vDatetime, vDate, vText
import socket
socket.setdefaulttimeout(5)
from molly.external_media import sanitise_html
-from molly.conf.settings import batch
+from molly.conf.provider import task
from molly.apps.feeds.providers import BaseFeedsProvider
@@ -26,26 +24,18 @@ class ICalFeedsProvider(BaseFeedsProvider):
"""
verbose_name = 'iCal'
- @batch('%d * * * *' % random.randint(0, 59))
- def import_data(self, metadata, output):
+ @task(run_every=timedelta(minutes=60))
+ def import_data(self, **metadata):
"""
Pulls iCal feeds
"""
-
from molly.apps.feeds.models import Feed
for feed in Feed.objects.filter(provider=self.class_path):
- output.write("Importing %s\n" % feed.title)
- try:
- self.import_feed(feed)
- except Exception, e:
- output.write("Error importing %s\n" % feed.title)
- traceback.print_exc(file=output)
- output.write('\n')
- logger.warn("Error importing feed %r" % feed.title,
- exc_info=True, extra={'url': feed.rss_url})
-
+ logger.debug("Importing: %s - %s" % (feed.title, feed.rss_url))
+ self.import_feed.delay(feed)
return metadata
+ @task()
def import_feed(self, feed):
from molly.apps.feeds.models import Item, vCard
@@ -1,14 +1,12 @@
-from datetime import datetime
+from datetime import datetime, timedelta
import feedparser
import time
-import random
-import traceback
import logging
import socket
socket.setdefaulttimeout(5)
from molly.external_media import sanitise_html
-from molly.conf.settings import batch
+from molly.conf.provider import task
from molly.apps.feeds.providers import BaseFeedsProvider
@@ -28,26 +26,19 @@ def struct_to_datetime(s):
class RSSFeedsProvider(BaseFeedsProvider):
verbose_name = 'RSS'
- @batch('%d * * * *' % random.randint(0, 59))
- def import_data(self, metadata, output):
+ @task(run_every=timedelta(minutes=60))
+ def import_data(self, **metadata):
"""
Pulls RSS feeds
"""
from molly.apps.feeds.models import Feed
for feed in Feed.objects.filter(provider=self.class_path):
- output.write("Importing %s\n" % feed.title)
- try:
- self.import_feed(feed)
- except Exception, e:
- output.write("Error importing %s\n" % feed.title)
- traceback.print_exc(file=output)
- output.write('\n')
- logger.warn("Error importing feed %r" % feed.title,
- exc_info=True, extra={'url': feed.rss_url})
-
+ logger.debug("Importing: %s - %s" % (feed.title, feed.rss_url))
+ self.import_feed.delay(feed)
return metadata
+ @task(default_retry_delay=5, max_retries=3)
def import_feed(self, feed):
from molly.apps.feeds.models import Item
@@ -99,5 +90,3 @@ def import_feed(self, feed):
for item in Item.objects.filter(feed=feed):
if item not in items:
item.delete()
-
- return items
@@ -1,14 +1,12 @@
-from datetime import datetime
+from datetime import datetime, timedelta
from lxml import etree
import urllib2
-import random
-import traceback
import logging
import socket
socket.setdefaulttimeout(5)
from molly.external_media import sanitise_html
-from molly.conf.settings import batch
+from molly.conf.provider import task
from molly.apps.feeds.providers import BaseFeedsProvider
@@ -20,26 +18,18 @@
class TalksCamFeedsProvider(BaseFeedsProvider):
verbose_name = 'TalksCam'
- @batch('%d * * * *' % random.randint(0, 59))
- def import_data(self, metadata, output):
+ @task(run_every=timedelta(minutes=60))
+ def import_data(self, **metadata):
"""
Pulls TalksCam feeds
"""
-
from molly.apps.feeds.models import Feed
for feed in Feed.objects.filter(provider=self.class_path):
- output.write("Importing %s\n" % feed.title)
- try:
- self.import_feed(feed)
- except Exception, e:
- output.write("Error importing %s\n" % feed.title)
- traceback.print_exc(file=output)
- output.write('\n')
- logger.warn("Error importing feed %r" % feed.title,
- exc_info=True, extra={'url': feed.rss_url})
-
+ logger.debug("Importing: %s - %s" % (feed.title, feed.rss_url))
+ self.import_feed.delay(feed)
return metadata
+ @task()
def import_feed(self, feed):
from molly.apps.feeds.models import Item, vCard
View
@@ -12,8 +12,7 @@
from molly.utils.breadcrumbs import *
from molly.favourites import get_favourites
from molly.wurfl import device_parents
-from molly import conf
-from molly.conf.applications import app_by_application_name, has_app_by_application_name
+from molly.conf.applications import app_by_application_name, has_app_by_application_name, has_app, all_apps
from molly.apps.weather.models import Weather
from models import UserMessage
@@ -47,7 +46,7 @@ def handle_GET(self, request, context):
and not request.GET.get('preview') == 'true'
and not internal_referer
and not settings.DEBUG
- and conf.has_app('molly.apps.desktop')
+ and has_app('molly.apps.desktop')
and request.REQUEST.get('format') is None):
return self.redirect(reverse('desktop:index'), request)
@@ -83,7 +82,7 @@ def handle_GET(self, request, context):
'url': reverse('%s:index' % app.local_name) \
if app.has_urlconf else None,
'display_to_user': app.display_to_user,
- } for app in conf.all_apps()]
+ } for app in all_apps()]
# Add accesskeys to the first 9 apps to be displayed to the user
for i, app in enumerate(
@@ -1,4 +1,6 @@
-class BaseLibrarySearchProvider(object):
+from molly.conf.provider import Provider
+
+class BaseLibrarySearchProvider(Provider):
"""
Abstract class implementing the interface for a provider for the library app
"""
@@ -23,7 +25,7 @@ def control_number_search(self, control_number):
from z3950 import Z3950
-class BaseMetadataProvider(object):
+class BaseMetadataProvider(Provider):
"""
Abstract class implementing the interface for a provider which fetches
book covers
@@ -37,4 +39,4 @@ def annotate(self, books):
"""
pass
-from google import GoogleBooksProvider
+from google import GoogleBooksProvider
Oops, something went wrong.

0 comments on commit 7e5ae8f

Please sign in to comment.