From fdb91411dc19f34860f567922ea9f315b711d7a9 Mon Sep 17 00:00:00 2001 From: Matt Domsch Date: Tue, 15 Jun 2010 11:21:41 -0500 Subject: [PATCH] add PubSubHubbub-Publisher 1.0 to planet/vendor/, use it to publish at the end of each run --- planet.py | 8 ++ planet/__init__.py | 2 + planet/publish.py | 15 ++++ planet/vendor/pubsubhubbub_publisher/PKG-INFO | 10 +++ .../vendor/pubsubhubbub_publisher/__init__.py | 2 + .../pubsubhubbub_publish.py | 77 +++++++++++++++++++ 6 files changed, 114 insertions(+) create mode 100644 planet/publish.py create mode 100644 planet/vendor/pubsubhubbub_publisher/PKG-INFO create mode 100644 planet/vendor/pubsubhubbub_publisher/__init__.py create mode 100644 planet/vendor/pubsubhubbub_publisher/pubsubhubbub_publish.py diff --git a/planet.py b/planet.py index 881f756..c08949d 100755 --- a/planet.py +++ b/planet.py @@ -23,6 +23,7 @@ only_if_new = 0 expunge = 0 debug_splice = 0 + no_publish = 0 for arg in sys.argv[1:]: if arg == "-h" or arg == "--help": @@ -34,6 +35,7 @@ print " -h, --help Display this help message and exit" print " -n, --only-if-new Only spider new feeds" print " -x, --expunge Expunge old entries from cache" + print " --no-publish Do not publish feeds using PubSubHubbub" print sys.exit(0) elif arg == "-v" or arg == "--verbose": @@ -46,6 +48,8 @@ expunge = 1 elif arg == "-d" or arg == "--debug-splice": debug_splice = 1 + elif arg == "--no-publish": + no_publish = 1 elif arg.startswith("-"): print >>sys.stderr, "Unknown option:", arg sys.exit(1) @@ -84,6 +88,10 @@ splice.apply(doc.toxml('utf-8')) + if not no_publish: + from planet import publish + publish.publish(config) + if expunge: from planet import expunge expunge.expungeCache diff --git a/planet/__init__.py b/planet/__init__.py index 3f4bb7f..61c2cb1 100644 --- a/planet/__init__.py +++ b/planet/__init__.py @@ -38,3 +38,5 @@ def getLogger(level, format): import feedparser feedparser.SANITIZE_HTML=1 feedparser.RESOLVE_RELATIVE_URIS=0 + +import publish diff --git a/planet/publish.py b/planet/publish.py new file mode 100644 index 0000000..ce88cd3 --- /dev/null +++ b/planet/publish.py @@ -0,0 +1,15 @@ +import os, sys +import urlparse +import pubsubhubbub_publisher as PuSH + +def publish(config): + hub = config.pubsubhubbub_hub() + link = config.link() + if hub and link: + for root, dirs, files in os.walk(config.output_dir()): + xmlfiles = [urlparse.urljoin(link, f) for f in files if f.endswith('.xml')] + try: + PuSH.publish(hub, xmlfiles) + except PuSH.PublishError, e: + sys.stderr.write("PubSubHubbub publishing error: %s\n" % e) + break diff --git a/planet/vendor/pubsubhubbub_publisher/PKG-INFO b/planet/vendor/pubsubhubbub_publisher/PKG-INFO new file mode 100644 index 0000000..072227a --- /dev/null +++ b/planet/vendor/pubsubhubbub_publisher/PKG-INFO @@ -0,0 +1,10 @@ +Metadata-Version: 1.0 +Name: PubSubHubbub_Publisher +Version: 1.0 +Summary: Publisher client for PubSubHubbub +Home-page: http://code.google.com/p/pubsubhubbub/ +Author: Brett Slatkin +Author-email: bslatkin@gmail.com +License: Apache 2.0 +Description: A simple, open, server-to-server web-hook-based pubsub (publish/subscribe) protocol as a simple extension to Atom. Parties (servers) speaking the PubSubHubbub protocol can get near-instant notifications (via webhook callbacks) when a topic (Atom URL) they're interested in is updated. +Platform: UNKNOWN diff --git a/planet/vendor/pubsubhubbub_publisher/__init__.py b/planet/vendor/pubsubhubbub_publisher/__init__.py new file mode 100644 index 0000000..d9dbb68 --- /dev/null +++ b/planet/vendor/pubsubhubbub_publisher/__init__.py @@ -0,0 +1,2 @@ +from pubsubhubbub_publish import * + diff --git a/planet/vendor/pubsubhubbub_publisher/pubsubhubbub_publish.py b/planet/vendor/pubsubhubbub_publisher/pubsubhubbub_publish.py new file mode 100644 index 0000000..9ae6e66 --- /dev/null +++ b/planet/vendor/pubsubhubbub_publisher/pubsubhubbub_publish.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python +# +# Copyright 2009 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Simple Publisher client for PubSubHubbub. + +Example usage: + + from pubsubhubbub_publish import * + try: + publish('http://pubsubhubbub.appspot.com', + 'http://example.com/feed1/atom.xml', + 'http://example.com/feed2/atom.xml', + 'http://example.com/feed3/atom.xml') + except PublishError, e: + # handle exception... + +Set the 'http_proxy' environment variable on *nix or Windows to use an +HTTP proxy. +""" + +__author__ = 'bslatkin@gmail.com (Brett Slatkin)' + +import urllib +import urllib2 + + +class PublishError(Exception): + """An error occurred while trying to publish to the hub.""" + + +URL_BATCH_SIZE = 100 + + +def publish(hub, *urls): + """Publishes an event to a hub. + + Args: + hub: The hub to publish the event to. + **urls: One or more URLs to publish to. If only a single URL argument is + passed and that item is an iterable that is not a string, the contents of + that iterable will be used to produce the list of published URLs. If + more than URL_BATCH_SIZE URLs are supplied, this function will batch them + into chunks across multiple requests. + + Raises: + PublishError if anything went wrong during publishing. + """ + if len(urls) == 1 and not isinstance(urls[0], basestring): + urls = list(urls[0]) + + for i in xrange(0, len(urls), URL_BATCH_SIZE): + chunk = urls[i:i+URL_BATCH_SIZE] + data = urllib.urlencode( + {'hub.url': chunk, 'hub.mode': 'publish'}, doseq=True) + try: + response = urllib2.urlopen(hub, data) + except (IOError, urllib2.HTTPError), e: + if hasattr(e, 'code') and e.code == 204: + continue + error = '' + if hasattr(e, 'read'): + error = e.read() + raise PublishError('%s, Response: "%s"' % (e, error))