Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100755 48 lines (38 sloc) 1.292 kb
f28b73a @mhagander Initial version of new planet code
authored
1 #!/usr/bin/env python
2 """PostgreSQL Planet Aggregator
3
4 This file contains the functions to suck down RSS/Atom feeds
5 (using feedparser), determining the actual blog URL (for the
6 HTML posts), and update the database with them.
7
8 Copyright (C) 2008 PostgreSQL Global Development Group
9 """
10
11 import psycopg2
12 import feedparser
13 import datetime
14 import socket
15
16 class Aggregator:
17 def __init__(self, db):
18 self.db = db
19 self.stored = 0
20 socket.setdefaulttimeout(20)
21
22 def Update(self):
23 feeds = self.db.cursor()
24 feeds.execute("SELECT id,feedurl,name,blogurl FROM planet.feeds WHERE blogurl='' AND feedurl NOT LIKE '%planet%'")
25 for feed in feeds.fetchall():
26 self.DiscoverFeed(feed)
27 self.db.commit()
28
29 def DiscoverFeed(self, feedinfo):
30 feed = feedparser.parse(feedinfo[1])
31
32 if feed.status != 200:
33 # not ok!
34 print "Feed %s status %s" % (feedinfo[1], feed.status)
35 return
e1befe8 @mhagander Exception handling for semi-broken RSS feeds, like those at Sun...
authored
36
37 try:
38 if feed.feed.link:
39 print "Setting feed for %s to %s" % (feedinfo[2], feed.feed.link)
40 c = self.db.cursor()
41 c.execute("UPDATE planet.feeds SET blogurl='%s' WHERE id=%i" % (feed.feed.link, feedinfo[0]))
42 except:
43 print "Exception when processing feed for %s" % (feedinfo[2])
44 print feed
f28b73a @mhagander Initial version of new planet code
authored
45
46 if __name__=="__main__":
47 Aggregator(psycopg2.connect('dbname=planetpg host=/tmp/')).Update()
Something went wrong with that request. Please try again.