Skip to content

Commit

Permalink
Catch URLError as well as HTTPError, and don't faile on non-HTML content
Browse files Browse the repository at this point in the history
  • Loading branch information
mhl committed May 1, 2012
1 parent 892d5a3 commit ac6673b
Showing 1 changed file with 14 additions and 5 deletions.
19 changes: 14 additions & 5 deletions delicious-recent-summary
Original file line number Diff line number Diff line change
Expand Up @@ -181,13 +181,22 @@ for row in table:
url = f.geturl()
f.close()
except urllib2.HTTPError, e:
print >> sys.stderr, "Error fetching", original_url, "which was:", unicode(e)
print >> sys.stderr, "HTTPError fetching", unicode(original_url), "which was:", unicode(e)
continue
titles = tree.xpath('.//title')
if titles:
title = titles[0].text
except urllib2.URLError, e:
print >> sys.stderr, "URLError fetching", unicode(original_url), "which was:", unicode(e)
continue
root = tree.getroot()
if root is not None:
titles = root.xpath('.//title')
if titles:
title = titles[0].text
if not title:
title = '[Title was empty]'
else:
title = '[No title was found]'
else:
title = '[No <TITLE> element found]'
title = '[Parsing as HTML failed]'
title = title.strip()
title = re.sub('\s+', ' ', title)
entry = SimpleEntry(row['from_user'],
Expand Down

0 comments on commit ac6673b

Please sign in to comment.