Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Initial version of new planet code

  • Loading branch information...
commit f28b73a2e01ea82431b56d76d5c93102671c2cbf 0 parents
@mhagander authored
70 aggregator.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+"""PostgreSQL Planet Aggregator
+
+This file contains the functions to suck down RSS/Atom feeds
+(using feedparser) and store the results in a PostgreSQL database.
+
+Copyright (C) 2008 PostgreSQL Global Development Group
+"""
+
+import psycopg2
+import feedparser
+import datetime
+import socket
+
+class Aggregator:
+ def __init__(self, db):
+ self.db = db
+ self.stored = 0
+ socket.setdefaulttimeout(20)
+
+ def Update(self):
+ feeds = self.db.cursor()
+ feeds.execute('SELECT id,feedurl,name,lastget FROM planet.feeds')
+ for feed in feeds.fetchall():
+ self.ParseFeed(feed)
+ self.db.commit()
+
+ def ParseFeed(self, feedinfo):
+ #print "Loading feed %s" % (feedinfo[1])
+ parsestart = datetime.datetime.now()
+ feed = feedparser.parse(feedinfo[1], modified=feedinfo[3].timetuple())
+
+ if feed.status == 304:
+ # not changed
+ return
+ if feed.status != 200:
+ # not ok!
+ print "Feed %s status %s" % (feedinfo[1], feed.status)
+ return
+
+ for entry in feed.entries:
+ if entry.has_key('summary'):
+ txt = entry.summary
+ else:
+ txt = entry.content[0].value
+ if entry.has_key('guidislink'):
+ guidisperma = entry.guidislink
+ else:
+ guidisperma = True
+ self.StoreEntry(feedinfo[0], entry.id, entry.date, entry.link, guidisperma, entry.title, txt)
+ self.db.cursor().execute('UPDATE planet.feeds SET lastget=%(lg)s WHERE id=%(feed)s', {'lg':parsestart, 'feed': feedinfo[0]})
+
+ def StoreEntry(self, feedid, guid, date, link, guidisperma, title, txt):
+ c = self.db.cursor()
+ c.execute("SELECT id FROM planet.posts WHERE feed=%(feed)s AND guid=%(guid)s", {'feed':feedid, 'guid':guid})
+ if c.rowcount > 0:
+ return
+ print "Store entry %s from feed %s" % (guid, feedid)
+ c.execute("INSERT INTO planet.posts (feed,guid,link,guidisperma,dat,title,txt) VALUES (%(feed)s,%(guid)s,%(link)s,%(guidisperma)s,%(date)s,%(title)s,%(txt)s)",
+ {'feed': feedid,
+ 'guid': guid,
+ 'link': link,
+ 'guidisperma': guidisperma,
+ 'date': date,
+ 'title': title,
+ 'txt': txt})
+ self.stored += 1
+
+if __name__=="__main__":
+ Aggregator(psycopg2.connect('dbname=planetpg host=/tmp/')).Update()
43 discovery.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+"""PostgreSQL Planet Aggregator
+
+This file contains the functions to suck down RSS/Atom feeds
+(using feedparser), determining the actual blog URL (for the
+HTML posts), and update the database with them.
+
+Copyright (C) 2008 PostgreSQL Global Development Group
+"""
+
+import psycopg2
+import feedparser
+import datetime
+import socket
+
+class Aggregator:
+ def __init__(self, db):
+ self.db = db
+ self.stored = 0
+ socket.setdefaulttimeout(20)
+
+ def Update(self):
+ feeds = self.db.cursor()
+ feeds.execute("SELECT id,feedurl,name,blogurl FROM planet.feeds WHERE blogurl='' AND feedurl NOT LIKE '%planet%'")
+ for feed in feeds.fetchall():
+ self.DiscoverFeed(feed)
+ self.db.commit()
+
+ def DiscoverFeed(self, feedinfo):
+ feed = feedparser.parse(feedinfo[1])
+
+ if feed.status != 200:
+ # not ok!
+ print "Feed %s status %s" % (feedinfo[1], feed.status)
+ return
+
+ if feed.feed.link:
+ print "Setting feed for %s to %s" % (feedinfo[2], feed.feed.link)
+ c = self.db.cursor()
+ c.execute("UPDATE planet.feeds SET blogurl='%s' WHERE id=%i" % (feed.feed.link, feedinfo[0]))
+
+if __name__=="__main__":
+ Aggregator(psycopg2.connect('dbname=planetpg host=/tmp/')).Update()
2,858 feedparser.py
2,858 additions, 0 deletions not shown
124 generator.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+"""PostgreSQL Planet Aggregator
+
+This file contains the functions to generate output RSS and
+HTML data from what's currently in the database.
+
+Copyright (C) 2008 PostgreSQL Global Development Group
+"""
+
+import psycopg2
+import PyRSS2Gen
+import datetime
+import sys
+from HTMLParser import HTMLParser
+from planethtml import PlanetHtml
+
+class Generator:
+ def __init__(self,db):
+ self.db = db
+
+ def Generate(self):
+ rss = PyRSS2Gen.RSS2(
+ title = 'Planet PostgreSQL',
+ link = 'http://planet.postgresql.org',
+ description = 'Planet PostgreSQL',
+ generator = 'Planet PostgreSQL',
+ lastBuildDate = datetime.datetime.utcnow())
+ html = PlanetHtml()
+
+ c = self.db.cursor()
+ c.execute("SET TIMEZONE=GMT")
+ c.execute("SELECT guid,link,dat,title,txt,name,blogurl,guidisperma FROM planet.posts INNER JOIN planet.feeds ON planet.feeds.id=planet.posts.feed ORDER BY dat DESC LIMIT 30")
+ for post in c.fetchall():
+ desc = self.TruncateAndCleanDescription(post[4], post[3])
+ rss.items.append(PyRSS2Gen.RSSItem(
+ title=post[5] + ': ' + post[3],
+ link=post[1],
+ guid=PyRSS2Gen.Guid(post[0],post[7]),
+ pubDate=post[2],
+ description=desc))
+ html.AddItem(post[0], post[1], post[2], post[3], post[5], post[6], desc)
+
+ c.execute("SELECT name,blogurl,feedurl FROM planet.feeds ORDER BY name")
+ for feed in c.fetchall():
+ html.AddFeed(feed[0], feed[1], feed[2])
+
+ rss.write_xml(open("www/rss20.xml","w"), encoding='utf-8')
+ html.WriteFile("www/index.html")
+
+ def TruncateAndCleanDescription(self, txt, title):
+ ht = HtmlTruncator(1024, title)
+ ht.feed(txt)
+ out = ht.GetText()
+
+ # Remove initial <br /> tags
+ while out.startswith('<br'):
+ out = out[out.find('>')+1:]
+
+ return out
+
+class HtmlTruncator(HTMLParser):
+ def __init__(self, maxlen, title = None):
+ HTMLParser.__init__(self)
+ self.len = 0
+ self.maxlen = maxlen
+ self.fulltxt = ''
+ self.trunctxt = ''
+ self.tagstack = []
+ self.skiprest = False
+ self.title = title
+
+ def feed(self, txt):
+ txt = txt.lstrip()
+ self.fulltxt += txt
+ HTMLParser.feed(self, txt)
+
+ def handle_startendtag(self, tag, attrs):
+ if self.skiprest: return
+ self.trunctxt += self.get_starttag_text()
+
+ def handle_starttag(self, tag, attrs):
+ if self.skiprest: return
+ self.trunctxt += "<" + tag
+ self.trunctxt += (' '.join([(' %s="%s"' % (k,v)) for k,v in attrs]))
+ self.trunctxt += ">"
+ self.tagstack.append(tag)
+
+ def handle_endtag(self, tag):
+ if self.skiprest: return
+ self.trunctxt += "</" + tag + ">"
+ self.tagstack.pop()
+
+ def handle_entityref(self, ref):
+ self.len += 1
+ if self.skiprest: return
+ self.trunctxt += "&" + ref + ";"
+
+ def handle_data(self, data):
+ self.len += len(data)
+ if self.skiprest: return
+ self.trunctxt += data
+ if self.len > self.maxlen:
+ # Passed max length, so truncate text as close to the limit as possible
+ self.trunctxt = self.trunctxt[0:len(self.trunctxt)-(self.len-self.maxlen)]
+ # Terminate at whitespace if possible, max 12 chars back
+ for i in range(len(self.trunctxt)-1, len(self.trunctxt)-12, -1):
+ if self.trunctxt[i].isspace():
+ self.trunctxt = self.trunctxt[0:i] + " [...]"
+ break
+
+ # Now append any tags that weren't properly closed
+ self.tagstack.reverse()
+ for tag in self.tagstack:
+ self.trunctxt += "</" + tag + ">"
+ self.skiprest = True
+
+ def GetText(self):
+ if self.len > self.maxlen:
+ return self.trunctxt
+ else:
+ return self.fulltxt
+
+if __name__=="__main__":
+ Generator(psycopg2.connect('dbname=planetpg host=/tmp')).Generate()
9 planet_run.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+PATH=$PATH:/usr/local/bin
+
+cd /home/planetpg/planet
+date >> planet.log
+python aggregator.py >> planet.log 2>&1
+python generator.py >>planet.log 2>&1
+echo Done `date` >> planet.log
107 planethtml.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+"""PostgreSQL Planet Aggregator
+
+This file contains the functions to generate HTML format output.
+It's a fairly ugly hack compared to using a real template
+system, but...
+
+Copyright (C) 2008 PostgreSQL Global Development Group
+"""
+
+import datetime
+
+class PlanetHtml:
+ def __init__(self):
+ self.items = []
+ self.feeds = []
+ self.str = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en" dir="ltr">
+ <head>
+ <title>Planet PostgreSQL</title>
+ <meta http-equiv="Content-Type" content="text/xhtml; charset=utf-8" />
+ <link rel="shortcut icon" href="/favicon.ico" />
+ <link rel="alternate" type="application/rss+xml" title="Planet PostgreSQL" href="http://planet.postgresql.org/rss20.xml" />
+ <style type="text/css" media="screen" title="Normal Text">@import url("css/planet.css");</style>
+ </head>
+ <body>
+ <div align="center">
+ <div id="planetHeader">
+ <div class="fl"><img src="http://www.postgresql.org/layout/images/hdr_left.png" border="0" alt="PostgreSQL" /></div>
+ <div class="fr"><img width="210" height="80" src="http://www.postgresql.org/layout/images/hdr_right.png" alt="The world's most advanced open source database" /></div>
+ <div class="cb"></div>
+ </div>
+ <div id="planetMain">
+"""
+
+ def AddItem(self,guid,link,dat,title,author,blogurl,txt):
+ self.items.append((guid,link,dat,title,author,blogurl,txt))
+
+ def AddFeed(self,name,blogurl,feedurl):
+ self.feeds.append((name,blogurl,feedurl))
+
+ def BuildPosts(self):
+ self.str += """ <div id="planetLeft">"""
+ lastdate = None
+ for post in self.items:
+ if post[6].endswith('[...]'):
+ txt = post[6][:len(post[6])-4] + """<a href="%s">continue reading...</a>]""" % (post[1])
+ else:
+ txt = post[6]
+
+ if lastdate == None or lastdate != post[2].date():
+ self.str += """
+ <div class="planetNewDate">%s</div>""" % (post[2].date())
+ lastdate = post[2].date()
+
+ if post[5]:
+ posterstr = """<a class="author" href="%s">%s</a>""" % (post[5], post[4])
+ else:
+ posterstr = post[4]
+
+ self.str += """
+ <div class="planetPost">
+ <div class="planetPostTitle"><a href="%s">%s</a></div>
+ <div class="planetPostAuthor">
+ <div class="ppa_top">&nbsp;</div>
+ <p>Posted by %s on <span class="date">%s at %s</span></p>
+ <div class="ppa_bottom">&nbsp;</div>
+ </div>
+ <div class="planetPostContent">%s</div>
+ <div class="cb"></div>
+ </div>""" % (post[1], post[3], posterstr, post[2].date(), post[2].time(), txt)
+
+ self.str += """ </div>"""
+
+ def BuildRight(self):
+ self.str += """ <div id="planetRight">
+<div class="planetRightTitle">Subscriptions</div>
+<ul>"""
+ for feed in self.feeds:
+ self.str += "<li>"
+ if feed[1] != '':
+ self.str += """<a href="%s">%s</a>""" % (feed[1], feed[0])
+ else:
+ self.str += feed[0]
+ self.str += """
+<a href="%s"><img border="0" src="http://www.postgresql.org/layout/images/ico_rss.png" /></a></li>""" % (feed[2])
+ self.str += """ </ul>
+ <div class="planetRightTitle">Feeds</div>
+ <ul>
+ <li><a href="rss20.xml">Planet PostgreSQL</a> <a href="rss20.xml"><img border="0" src="http://www.postgresql.org/layout/images/ico_rss.png"></a></li>
+ </ul>
+ </div>
+"""
+ def WriteFile(self,filename):
+ self.BuildPosts()
+ self.BuildRight()
+ self.str += """
+ </div>
+ </div>
+ </div>
+</body>
+</html>
+"""
+ f = open(filename,"w")
+ f.write(self.str)
+ f.close()
132 www/css/planet.css
@@ -0,0 +1,132 @@
+body {
+ font-family: verdana, sans-serif;
+ color: #000000;
+ background-color: #ffffff;
+ margin: 0 0 0 0;
+ padding: 0 0 0 0;
+ font-size: 13px;
+}
+
+div#planetHeader {
+ width: 800px;
+ background: url(http://www.postgresql.org/layout/images/hdr_fill.png);
+ padding: 0 0 0 0;
+ height: 80px;
+ margin: 5px 0 2px 0;
+}
+
+div#planetMain {
+ width: 800px;
+ border: none;
+}
+
+div#planetLeft {
+ width: 590px;
+ float:left;
+ padding-top: 10px;
+}
+
+div.planetNewDate {
+ display: none;
+ color: #ec5800;
+ font-size: 20px;
+ text-align: left;
+ margin-left: 40px;
+}
+
+div.planetPost {
+ margin-bottom: 15px;
+ padding: 5px 5px 15px 5px;
+ text-align: left;
+ border-bottom: thin solid #dadada;
+}
+
+div.planetPostTitle {
+ font-size: 20px;
+ font-style: bold;
+ font-family: verdana, helvetica, arial, sans-serif;
+ padding-bottom: 15px;
+}
+
+div.planetPostTitle a {
+ text-decoration: none;
+}
+
+div.planetPostTitle a:hover {
+ text-decoration: underline;
+}
+
+div.ppa_top {
+ margin: 0 10px 0 -10px;
+ height: 27px;
+ background: url(/img/tleft.png) no-repeat top left;
+}
+
+div.ppa_bottom {
+ margin: 0 10px 0 -10px;
+ height: 27px;
+ background: url(/img/bleft.png) no-repeat top left;
+}
+
+div.planetPostAuthor {
+/* color: #ec5800; */
+ width: 57px;
+ color: #909090;
+ text-align: center;
+ font-size: .8em;
+ margin: 0;
+ padding: 0 10px 0 10px;
+ float: left;
+ background-color: #dadada;
+ overflow: hidden;
+}
+
+div.planetPostAuthor a.author {
+ font-style: italic;
+}
+
+div.planetPostAuthor span.date {
+ font-style: italic;
+}
+
+div.planetPostContent {
+ text-align: left;
+/* float: right;
+ width: 480px; */
+ margin-left: 85px;
+}
+
+div.planetPostContent img {
+ display: block;
+ margin-left: auto;
+ margin-right: auto;
+max-height: 100px;
+max-width: 100px;
+}
+
+div#planetRight {
+ width: 190px;
+ margin-top: 10px;
+ float: right;
+ text-align: left;
+}
+
+div#planetRight div.planetRightTitle {
+ font-weight: bold;
+ font-size: 20px;
+}
+
+div#planetRight ul {
+ list-style: none;
+ padding-left: 5px;
+}
+
+div.fl { float: left; border: none; text-align: left; }
+div.fr { float: right; }
+div.cb { clear: both; }
+
+a:link { color:#0085B0; text-decoration: underline; }
+a:visited { color:#004E66; text-decoration: underline; }
+a:active { color:#0085B0; text-decoration: underline; }
+a:hover { color:#000000; text-decoration: underline; }
+
BIN  www/img/bleft.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
BIN  www/img/tleft.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit f28b73a

Please sign in to comment.
Something went wrong with that request. Please try again.