Permalink
Browse files

Initial version of new planet code

  • Loading branch information...
0 parents commit f28b73a2e01ea82431b56d76d5c93102671c2cbf @mhagander committed Oct 18, 2008
Showing with 3,343 additions and 0 deletions.
  1. +70 −0 aggregator.py
  2. +43 −0 discovery.py
  3. +2,858 −0 feedparser.py
  4. +124 −0 generator.py
  5. +9 −0 planet_run.sh
  6. +107 −0 planethtml.py
  7. +132 −0 www/css/planet.css
  8. BIN www/img/bleft.png
  9. BIN www/img/tleft.png
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+"""PostgreSQL Planet Aggregator
+
+This file contains the functions to suck down RSS/Atom feeds
+(using feedparser) and store the results in a PostgreSQL database.
+
+Copyright (C) 2008 PostgreSQL Global Development Group
+"""
+
+import psycopg2
+import feedparser
+import datetime
+import socket
+
+class Aggregator:
+ def __init__(self, db):
+ self.db = db
+ self.stored = 0
+ socket.setdefaulttimeout(20)
+
+ def Update(self):
+ feeds = self.db.cursor()
+ feeds.execute('SELECT id,feedurl,name,lastget FROM planet.feeds')
+ for feed in feeds.fetchall():
+ self.ParseFeed(feed)
+ self.db.commit()
+
+ def ParseFeed(self, feedinfo):
+ #print "Loading feed %s" % (feedinfo[1])
+ parsestart = datetime.datetime.now()
+ feed = feedparser.parse(feedinfo[1], modified=feedinfo[3].timetuple())
+
+ if feed.status == 304:
+ # not changed
+ return
+ if feed.status != 200:
+ # not ok!
+ print "Feed %s status %s" % (feedinfo[1], feed.status)
+ return
+
+ for entry in feed.entries:
+ if entry.has_key('summary'):
+ txt = entry.summary
+ else:
+ txt = entry.content[0].value
+ if entry.has_key('guidislink'):
+ guidisperma = entry.guidislink
+ else:
+ guidisperma = True
+ self.StoreEntry(feedinfo[0], entry.id, entry.date, entry.link, guidisperma, entry.title, txt)
+ self.db.cursor().execute('UPDATE planet.feeds SET lastget=%(lg)s WHERE id=%(feed)s', {'lg':parsestart, 'feed': feedinfo[0]})
+
+ def StoreEntry(self, feedid, guid, date, link, guidisperma, title, txt):
+ c = self.db.cursor()
+ c.execute("SELECT id FROM planet.posts WHERE feed=%(feed)s AND guid=%(guid)s", {'feed':feedid, 'guid':guid})
+ if c.rowcount > 0:
+ return
+ print "Store entry %s from feed %s" % (guid, feedid)
+ c.execute("INSERT INTO planet.posts (feed,guid,link,guidisperma,dat,title,txt) VALUES (%(feed)s,%(guid)s,%(link)s,%(guidisperma)s,%(date)s,%(title)s,%(txt)s)",
+ {'feed': feedid,
+ 'guid': guid,
+ 'link': link,
+ 'guidisperma': guidisperma,
+ 'date': date,
+ 'title': title,
+ 'txt': txt})
+ self.stored += 1
+
+if __name__=="__main__":
+ Aggregator(psycopg2.connect('dbname=planetpg host=/tmp/')).Update()
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+"""PostgreSQL Planet Aggregator
+
+This file contains the functions to suck down RSS/Atom feeds
+(using feedparser), determining the actual blog URL (for the
+HTML posts), and update the database with them.
+
+Copyright (C) 2008 PostgreSQL Global Development Group
+"""
+
+import psycopg2
+import feedparser
+import datetime
+import socket
+
+class Aggregator:
+ def __init__(self, db):
+ self.db = db
+ self.stored = 0
+ socket.setdefaulttimeout(20)
+
+ def Update(self):
+ feeds = self.db.cursor()
+ feeds.execute("SELECT id,feedurl,name,blogurl FROM planet.feeds WHERE blogurl='' AND feedurl NOT LIKE '%planet%'")
+ for feed in feeds.fetchall():
+ self.DiscoverFeed(feed)
+ self.db.commit()
+
+ def DiscoverFeed(self, feedinfo):
+ feed = feedparser.parse(feedinfo[1])
+
+ if feed.status != 200:
+ # not ok!
+ print "Feed %s status %s" % (feedinfo[1], feed.status)
+ return
+
+ if feed.feed.link:
+ print "Setting feed for %s to %s" % (feedinfo[2], feed.feed.link)
+ c = self.db.cursor()
+ c.execute("UPDATE planet.feeds SET blogurl='%s' WHERE id=%i" % (feed.feed.link, feedinfo[0]))
+
+if __name__=="__main__":
+ Aggregator(psycopg2.connect('dbname=planetpg host=/tmp/')).Update()
Oops, something went wrong. Retry.

0 comments on commit f28b73a

Please sign in to comment.