Permalink
Browse files

Make the aggregator write it's log to the database instead of just

a flatfile.
Add ability to view this data on a per-blog basis in /register/.
Set up a cronjob to mail summary logs to planet@postgresql.org.
  • Loading branch information...
1 parent a4af9ca commit cf93080d4a83099cbfdad209e57324d6bda37da6 @mhagander committed Jan 31, 2009
View
@@ -1,10 +1,11 @@
#!/usr/bin/env python
+# vim: ai ts=4 sts=4 sw=4
"""PostgreSQL Planet Aggregator
This file contains the functions to suck down RSS/Atom feeds
(using feedparser) and store the results in a PostgreSQL database.
-Copyright (C) 2008 PostgreSQL Global Development Group
+Copyright (C) 2008-2009 PostgreSQL Global Development Group
"""
import psycopg2
@@ -25,24 +26,33 @@ def Update(self):
feeds.execute('SELECT id,feedurl,name,lastget,authorfilter FROM planet.feeds')
for feed in feeds.fetchall():
try:
- self.ParseFeed(feed)
+ n = self.ParseFeed(feed)
+ if n > 0:
+ c = self.db.cursor()
+ c.execute("INSERT INTO planet.aggregatorlog (feed, success, info) VALUES (%(feed)s, 't', %(info)s)", {
+ 'feed': feed[0],
+ 'info': 'Fetched %s posts.' % n,
+ })
except Exception, e:
print "Exception when parsing feed '%s': %s" % (feed[1], e)
self.db.rollback()
+ c = self.db.cursor()
+ c.execute("INSERT INTO planet.aggregatorlog (feed, success, info) VALUES (%(feed)s, 'f', %(info)s)", {
+ 'feed': feed[0],
+ 'info': 'Error: "%s"' % e,
+ })
self.db.commit()
def ParseFeed(self, feedinfo):
- #print "Loading feed %s" % (feedinfo[1])
+ numadded = 0
parsestart = datetime.datetime.now()
feed = feedparser.parse(feedinfo[1], modified=feedinfo[3].timetuple())
if feed.status == 304:
# not changed
- return
+ return 0
if feed.status != 200:
- # not ok!
- print "Feed %s status %s" % (feedinfo[1], feed.status)
- return
+ raise Exception('Feed returned status %s' % feed.status)
self.authorfilter = feedinfo[4]
@@ -61,16 +71,19 @@ def ParseFeed(self, feedinfo):
if txt == '' and entry.has_key('summary'):
txt = entry.summary
if txt == '':
+ # Not a critical error, we just ignore empty posts
print "Failed to get text for entry at %s" % entry.link
continue
if entry.has_key('guidislink'):
guidisperma = entry.guidislink
else:
guidisperma = True
- self.StoreEntry(feedinfo[0], entry.id, entry.date, entry.link, guidisperma, entry.title, txt)
- self.db.cursor().execute("UPDATE planet.feeds SET lastget=COALESCE((SELECT max(dat) FROM planet.posts WHERE planet.posts.feed=planet.feeds.id),'2000-01-01') WHERE planet.feeds.id=%(feed)s", {'feed': feedinfo[0]})
- #self.db.cursor().execute('UPDATE planet.feeds SET lastget=%(lg)s WHERE id=%(feed)s', {'lg':parsestart, 'feed': feedinfo[0]})
+ if self.StoreEntry(feedinfo[0], entry.id, entry.date, entry.link, guidisperma, entry.title, txt) > 0:
+ numadded += 1
+ if numadded > 0:
+ self.db.cursor().execute("UPDATE planet.feeds SET lastget=COALESCE((SELECT max(dat) FROM planet.posts WHERE planet.posts.feed=planet.feeds.id),'2000-01-01') WHERE planet.feeds.id=%(feed)s", {'feed': feedinfo[0]})
+ return numadded
def matches_filter(self, entry):
# For now, we only match against self.authorfilter. In the future,
@@ -90,7 +103,7 @@ def StoreEntry(self, feedid, guid, date, link, guidisperma, title, txt):
c = self.db.cursor()
c.execute("SELECT id FROM planet.posts WHERE feed=%(feed)s AND guid=%(guid)s", {'feed':feedid, 'guid':guid})
if c.rowcount > 0:
- return
+ return 0
print "Store entry %s from feed %s" % (guid, feedid)
c.execute("INSERT INTO planet.posts (feed,guid,link,guidisperma,dat,title,txt) VALUES (%(feed)s,%(guid)s,%(link)s,%(guidisperma)s,%(date)s,%(title)s,%(txt)s)",
{'feed': feedid,
@@ -101,6 +114,7 @@ def StoreEntry(self, feedid, guid, date, link, guidisperma, title, txt):
'title': title,
'txt': txt})
self.stored += 1
+ return 1
if __name__=="__main__":
c = ConfigParser.ConfigParser()
View
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+# vim: ai ts=4 sts=4 sw=4
+"""PostgreSQL Planet Aggregator
+
+This file contains the functions to email a report of failed fetches
+by reading the aggregator log table in the database.
+
+Current version just sends a single summary report. A future enhancement
+could be to send reports directly to individual blog owners.
+
+Copyright (C) 2009 PostgreSQL Global Development Group
+"""
+
+import psycopg2
+import smtplib
+import email.Message
+import ConfigParser
+
+class LogChecker(object):
+ def __init__(self, cfg, db):
+ self.cfg = cfg
+ self.db = db
+
+ def Check(self):
+ c = self.db.cursor()
+ c.execute("""SELECT ts,name,info FROM planet.aggregatorlog
+ INNER JOIN planet.feeds ON feed=feeds.id
+ WHERE success='f' AND ts > CURRENT_TIMESTAMP-'24 hours'::interval
+ ORDER BY name,ts""")
+ if c.rowcount > 0:
+ s = """
+One or more of the blogs fetched in the past 24 hours caused an error
+as listed below.
+
+"""
+ last = ""
+ for r in c:
+ if not last == r[1]:
+ last = r[1]
+ s += "\n"
+ s += "%s %-20s %s\n" % (r[0].strftime("%Y%m%d %H:%M:%S"), r[1][:20], r[2])
+
+ s += "\n\n"
+
+ toAddr = self.cfg.get('notify','mailto')
+ fromAddr = self.cfg.get('notify','mailfrom')
+
+ msg = email.Message.Message()
+ msg['To'] = toAddr
+ msg['From'] = fromAddr
+ msg['Subject'] = 'Planet PostgreSQL error summary'
+ msg.set_payload(s)
+
+
+ smtp = smtplib.SMTP('127.0.0.1')
+ smtp.sendmail(fromAddr, toAddr, msg.as_string())
+ smtp.quit()
+
+
+if __name__=="__main__":
+ c = ConfigParser.ConfigParser()
+ c.read('planet.ini')
+ LogChecker(c, psycopg2.connect(c.get('planet','db'))).Check()
+
View
@@ -8,3 +8,6 @@ server=localhost
listname=planet-subscribers
password=yeahthatssecret
+[notify]
+mailfrom=webmaster@postgresql.org
+mailto=planet@postgresql.org
@@ -63,3 +63,14 @@ def __str__(self):
class Meta:
db_table = 'planetadmin\".\"auditlog'
ordering = ['logtime']
+
+class AggregatorLog(models.Model):
+ ts = models.DateTimeField()
+ feed = models.ForeignKey(Blog, db_column='feed')
+ success = models.BooleanField()
+ info = models.TextField()
+
+ class Meta:
+ db_table = 'planet\".\"aggregatorlog'
+ ordering = ['-ts']
+
@@ -0,0 +1,26 @@
+{% extends "regbase.html" %}
+{%block regcontent %}
+<p>
+This is a log of the most recent activity on your feed. Note that only
+operations that had some effect are logged. The normal fetches that are
+made every 15 minutes don't log anything unless they found new posts or
+some error occurred.
+</p>
+<p>
+Return to <a href="../..">blog list</a>.
+</p>
+<table border="1" cellspacing="0" cellpadding="1">
+<tr>
+ <th>Time</th>
+ <th>Status</th>
+ <th>Info</th>
+</tr>
+{% for entry in entries %}
+<tr valign="top">
+ <td>{{entry.ts}}</td>
+ <td>{{entry.success|yesno:"Success,Failure"}}</td>
+ <td>{{entry.info}}</td>
+</tr>
+{%endfor%}
+</table>
+{% endblock %}
@@ -46,7 +46,8 @@
{%endif%}
</td>
- <td>{% if blog.approved or user.is_superuser%}
+ <td><a href="log/{{blog.id}}/">View log</a><br/>
+{% if blog.approved or user.is_superuser%}
<a href="blogposts/{{blog.id}}/">Posts</a><br/>
{%else%}
Not approved yet.<br/>
@@ -17,6 +17,7 @@
(r'^modify/(\d+)/$', 'planetadmin.register.views.modify'),
(r'^modifyauthorfilter/(\d+)/$', 'planetadmin.register.views.modifyauthorfilter'),
+ (r'^log/(\d+)/$','planetadmin.register.views.logview'),
(r'^blogposts/(\d+)/$', 'planetadmin.register.views.blogposts'),
(r'^blogposts/(\d+)/hide/(\d+)/$', 'planetadmin.register.views.blogpost_hide'),
(r'^blogposts/(\d+)/unhide/(\d+)/$', 'planetadmin.register.views.blogpost_unhide'),
@@ -212,6 +212,18 @@ def detach(request, id):
return HttpResponseRedirect('../..')
@login_required
+def logview(request, id):
+ blog = get_object_or_404(Blog, id=id)
+ if not blog.userid == request.user.username and not request.user.is_superuser:
+ return HttpResponse("You can't view the log for somebody elses blog!")
+
+ logentries = AggregatorLog.objects.filter(feed=blog)[:50]
+
+ return render_to_response('aggregatorlog.html', {
+ 'entries': logentries,
+ }, context_instance=RequestContext(request))
+
+@login_required
@transaction.commit_on_success
def blogposts(request, id):
blog = get_object_or_404(Blog, id=id)
View
@@ -22,6 +22,19 @@ SET default_tablespace = '';
SET default_with_oids = false;
--
+-- Name: aggregatorlog; Type: TABLE; Schema: planet; Owner: -; Tablespace:
+--
+
+CREATE TABLE aggregatorlog (
+ id integer NOT NULL,
+ ts timestamp with time zone DEFAULT now() NOT NULL,
+ feed integer NOT NULL,
+ success boolean NOT NULL,
+ info text NOT NULL
+);
+
+
+--
-- Name: feeds; Type: TABLE; Schema: planet; Owner: -; Tablespace:
--
@@ -55,6 +68,24 @@ CREATE TABLE posts (
--
+-- Name: aggregatorlog_id_seq; Type: SEQUENCE; Schema: planet; Owner: -
+--
+
+CREATE SEQUENCE aggregatorlog_id_seq
+ INCREMENT BY 1
+ NO MAXVALUE
+ NO MINVALUE
+ CACHE 1;
+
+
+--
+-- Name: aggregatorlog_id_seq; Type: SEQUENCE OWNED BY; Schema: planet; Owner: -
+--
+
+ALTER SEQUENCE aggregatorlog_id_seq OWNED BY aggregatorlog.id;
+
+
+--
-- Name: feeds_id_seq; Type: SEQUENCE; Schema: planet; Owner: -
--
@@ -94,6 +125,13 @@ ALTER SEQUENCE posts_id_seq OWNED BY posts.id;
-- Name: id; Type: DEFAULT; Schema: planet; Owner: -
--
+ALTER TABLE aggregatorlog ALTER COLUMN id SET DEFAULT nextval('aggregatorlog_id_seq'::regclass);
+
+
+--
+-- Name: id; Type: DEFAULT; Schema: planet; Owner: -
+--
+
ALTER TABLE feeds ALTER COLUMN id SET DEFAULT nextval('feeds_id_seq'::regclass);
@@ -105,6 +143,14 @@ ALTER TABLE posts ALTER COLUMN id SET DEFAULT nextval('posts_id_seq'::regclass);
--
+-- Name: aggregatorlog_pkey; Type: CONSTRAINT; Schema: planet; Owner: -; Tablespace:
+--
+
+ALTER TABLE ONLY aggregatorlog
+ ADD CONSTRAINT aggregatorlog_pkey PRIMARY KEY (id);
+
+
+--
-- Name: feeds_pkey; Type: CONSTRAINT; Schema: planet; Owner: -; Tablespace:
--
@@ -121,6 +167,20 @@ ALTER TABLE ONLY posts
--
+-- Name: aggregatorlog_feed_idx; Type: INDEX; Schema: planet; Owner: -; Tablespace:
+--
+
+CREATE INDEX aggregatorlog_feed_idx ON aggregatorlog USING btree (feed);
+
+
+--
+-- Name: aggregatorlog_feed_ts_idx; Type: INDEX; Schema: planet; Owner: -; Tablespace:
+--
+
+CREATE INDEX aggregatorlog_feed_ts_idx ON aggregatorlog USING btree (feed, ts);
+
+
+--
-- Name: feeds_feddurl; Type: INDEX; Schema: planet; Owner: -; Tablespace:
--
@@ -135,6 +195,14 @@ CREATE INDEX feeds_name ON feeds USING btree (name);
--
+-- Name: aggregatorlog_feed_fkey; Type: FK CONSTRAINT; Schema: planet; Owner: -
+--
+
+ALTER TABLE ONLY aggregatorlog
+ ADD CONSTRAINT aggregatorlog_feed_fkey FOREIGN KEY (feed) REFERENCES feeds(id);
+
+
+--
-- Name: posts_feed_fkey; Type: FK CONSTRAINT; Schema: planet; Owner: -
--

0 comments on commit cf93080

Please sign in to comment.