From 74b8431ba7a2a124af3282c8d7efa2f87355d597 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Wed, 25 May 2011 22:14:00 +0000 Subject: [PATCH] Fixed a problem with the HTML as of the latest changes in the forums. --- brink.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/brink.py b/brink.py index 763437d..d23ec31 100644 --- a/brink.py +++ b/brink.py @@ -15,6 +15,7 @@ import cookielib import html5lib import logging +from lxml import etree from twitter_text import TwitterText from datetime import datetime, timedelta from urlparse import urljoin @@ -388,6 +389,8 @@ def move_table_contents_out(node): parent.remove(node) def transform(node): + if node.tag is etree.Comment: + return node if '}' in node.tag: node.tag = node.tag.split('}')[-1] if node.tag == 'a': @@ -407,7 +410,8 @@ def transform(node): node.attrib.pop('cellspacing', None) elif node.tag == 'div': if node.getchildren(): - div_children = node.getchildren() + div_children = [x for x in node.getchildren() + if x.tag is not etree.Comment] if len(div_children) >= 2 and \ div_children[0].tag.split('}')[-1] == 'div' and \ div_children[0].text and \