Skip to content

Commit

Permalink
Store the Last-Modified time as received from the webserver.
Browse files Browse the repository at this point in the history
Use this for If-Modified-Since, instead of just using the timestamp
of the latest blog post found.

This should keep us from pulling the full feed from any blog that
has modified the contents of a post (or comments) without updating
the post date/GUID.
  • Loading branch information
mhagander committed Aug 24, 2010
1 parent 4b2b2d0 commit 5698f2c
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions aggregator.py
Expand Up @@ -93,8 +93,20 @@ def ParseFeed(self, feedinfo):
guidisperma = True guidisperma = True
if self.StoreEntry(feedinfo[0], entry.id, entry.date, entry.link, guidisperma, entry.title, txt) > 0: if self.StoreEntry(feedinfo[0], entry.id, entry.date, entry.link, guidisperma, entry.title, txt) > 0:
numadded += 1 numadded += 1
if numadded > 0:
self.db.cursor().execute("UPDATE planet.feeds SET lastget=COALESCE((SELECT max(dat) FROM planet.posts WHERE planet.posts.feed=planet.feeds.id),'2000-01-01') WHERE planet.feeds.id=%(feed)s", {'feed': feedinfo[0]}) # Check if we got back a Last-Modified time
if hasattr(feed, 'modified') and feed['modified']:
# Last-Modified header retreived. If we did receive it, we will
# trust the content (assuming we can parse it)
self.db.cursor().execute("UPDATE planet.feeds SET lastget=%(date)s WHERE id=%(feed)s AND NOT lastget=%(date)s", { 'date': datetime.datetime(*feed['modified'][:6]), 'feed': feedinfo[0]})
else:
# We didn't get a Last-Modified time, so set it to the entry date
# for the latest entry in this feed. Only do this if we have more
# than one entry.
if numadded > 0:
self.db.cursor().execute("UPDATE planet.feeds SET lastget=COALESCE((SELECT max(dat) FROM planet.posts WHERE planet.posts.feed=planet.feeds.id),'2000-01-01') WHERE planet.feeds.id=%(feed)s", {'feed': feedinfo[0]})

# Return the number of feeds we actually added
return numadded return numadded


def matches_filter(self, entry): def matches_filter(self, entry):
Expand Down

0 comments on commit 5698f2c

Please sign in to comment.