Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
improved duplication problem, where duplicate articles with different
time stamps were showing up.
  • Loading branch information
myano committed May 6, 2012
1 parent aafe33a commit 981740f
Showing 1 changed file with 11 additions and 5 deletions.
16 changes: 11 additions & 5 deletions modules/rss.py
Expand Up @@ -21,6 +21,7 @@
socket.setdefaulttimeout(10) socket.setdefaulttimeout(10)
INTERVAL = 60 # seconds between checking for new updates INTERVAL = 60 # seconds between checking for new updates
STOP = False STOP = False
dupes = dict()




def manage_rss(jenni, input): def manage_rss(jenni, input):
Expand Down Expand Up @@ -87,7 +88,7 @@ def manage_rss(jenni, input):
k = 0 k = 0
for row in c: for row in c:
k += 1 k += 1
jenni.say(unicode(row)) jenni.say("list: " + unicode(row))
if k == 0: if k == 0:
jenni.reply("No entries in database") jenni.reply("No entries in database")
else: else:
Expand Down Expand Up @@ -116,13 +117,12 @@ def read_feeds(jenni):
feed_channel = row[0] feed_channel = row[0]
feed_site_name = row[1] feed_site_name = row[1]
feed_url = row[2] feed_url = row[2]
feed_modified = row[3]
feed_fg = row[4] feed_fg = row[4]
feed_bg = row[5] feed_bg = row[5]
try: try:
fp = feedparser.parse(feed_url) fp = feedparser.parse(feed_url)
except IOError, E: except IOError, E:
jenni.say(str(E)) jenni.say("Can't parse, " + str(E))
try: try:
entry = fp.entries[0] entry = fp.entries[0]


Expand All @@ -133,7 +133,13 @@ def read_feeds(jenni):
elif feed_fg and feed_bg: elif feed_fg and feed_bg:
site_name_effect = "[\x02\x03%s,%s%s\x03\x02]" % (feed_fg, feed_bg, feed_site_name) site_name_effect = "[\x02\x03%s,%s%s\x03\x02]" % (feed_fg, feed_bg, feed_site_name)


if not feed_modified == entry.updated: #if not feed_modified == entry.updated:
if feed_channel not in dupes:
dupes[feed_channel] = dict()
if feed_site_name not in dupes[feed_channel]:
dupes[feed_channel][feed_site_name] = list()
if entry.title not in dupes[feed_channel][feed_site_name]:
dupes[feed_channel][feed_site_name].append(entry.title)
if entry.id: if entry.id:
article_url = entry.id article_url = entry.id
elif entry.feedburner_origlink: elif entry.feedburner_origlink:
Expand All @@ -142,6 +148,7 @@ def read_feeds(jenni):
article_url = entry.links[0].href article_url = entry.links[0].href


short_url = url_module.short(article_url) short_url = url_module.short(article_url)

if short_url: if short_url:
short_url = short_url[0][1][:-1] short_url = short_url[0][1][:-1]
else: else:
Expand All @@ -164,7 +171,6 @@ def read_feeds(jenni):
if DEBUG: if DEBUG:
jenni.say(str(E)) jenni.say(str(E))
c.close() c.close()
conn.close()




def startrss(jenni, input): def startrss(jenni, input):
Expand Down

0 comments on commit 981740f

Please sign in to comment.