Skip to content
Newer
Older
100755 71 lines (60 sloc) 2.08 KB
f28b73a @mhagander Initial version of new planet code
authored Oct 18, 2008
1 #!/usr/bin/env python
2 """PostgreSQL Planet Aggregator
3
4 This file contains the functions to suck down RSS/Atom feeds
5 (using feedparser) and store the results in a PostgreSQL database.
6
7 Copyright (C) 2008 PostgreSQL Global Development Group
8 """
9
10 import psycopg2
11 import feedparser
12 import datetime
13 import socket
14
15 class Aggregator:
16 def __init__(self, db):
17 self.db = db
18 self.stored = 0
19 socket.setdefaulttimeout(20)
20
21 def Update(self):
22 feeds = self.db.cursor()
23 feeds.execute('SELECT id,feedurl,name,lastget FROM planet.feeds')
24 for feed in feeds.fetchall():
25 self.ParseFeed(feed)
26 self.db.commit()
27
28 def ParseFeed(self, feedinfo):
29 #print "Loading feed %s" % (feedinfo[1])
30 parsestart = datetime.datetime.now()
31 feed = feedparser.parse(feedinfo[1], modified=feedinfo[3].timetuple())
32
33 if feed.status == 304:
34 # not changed
35 return
36 if feed.status != 200:
37 # not ok!
38 print "Feed %s status %s" % (feedinfo[1], feed.status)
39 return
40
41 for entry in feed.entries:
42 if entry.has_key('summary'):
43 txt = entry.summary
44 else:
45 txt = entry.content[0].value
46 if entry.has_key('guidislink'):
47 guidisperma = entry.guidislink
48 else:
49 guidisperma = True
50 self.StoreEntry(feedinfo[0], entry.id, entry.date, entry.link, guidisperma, entry.title, txt)
51 self.db.cursor().execute('UPDATE planet.feeds SET lastget=%(lg)s WHERE id=%(feed)s', {'lg':parsestart, 'feed': feedinfo[0]})
52
53 def StoreEntry(self, feedid, guid, date, link, guidisperma, title, txt):
54 c = self.db.cursor()
55 c.execute("SELECT id FROM planet.posts WHERE feed=%(feed)s AND guid=%(guid)s", {'feed':feedid, 'guid':guid})
56 if c.rowcount > 0:
57 return
58 print "Store entry %s from feed %s" % (guid, feedid)
59 c.execute("INSERT INTO planet.posts (feed,guid,link,guidisperma,dat,title,txt) VALUES (%(feed)s,%(guid)s,%(link)s,%(guidisperma)s,%(date)s,%(title)s,%(txt)s)",
60 {'feed': feedid,
61 'guid': guid,
62 'link': link,
63 'guidisperma': guidisperma,
64 'date': date,
65 'title': title,
66 'txt': txt})
67 self.stored += 1
68
69 if __name__=="__main__":
70 Aggregator(psycopg2.connect('dbname=planetpg host=/tmp/')).Update()
Something went wrong with that request. Please try again.