Skip to content

Commit

Permalink
lxml fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
jgraham.html committed Jan 28, 2008
1 parent 49260a1 commit f51d329
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 8 deletions.
10 changes: 7 additions & 3 deletions python/src/html5lib/treebuilders/etree_lxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,13 +170,17 @@ def insertRoot(self, name):
if self.doctype:
docStr += "<!DOCTYPE %s"%self.doctype.name
if self.doctype.publicId is not None:
docStr += "PUBLIC %s"%self.doctype.publicId
docStr += ' PUBLIC "%s"'%self.doctype.publicId
if self.doctype.systemId:
docStr += "SYSTEM %s"%self.doctype.systemId
docStr += ' "%s"'%self.doctype.systemId
docStr += ">"
docStr += "<html></html>"

root = etree.fromstring(docStr)
try:
root = etree.fromstring(docStr)
except etree.XMLSyntaxError:
print docStr
raise

#Create the root document and add the ElementTree to it
self.document = self.documentClass()
Expand Down
19 changes: 14 additions & 5 deletions python/tests/test_parser.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import os
import sys
import traceback

import StringIO
import unittest
from support import html5lib_test_files, TestData, convert, convertExpected
import warnings

warnings.simplefilter("error")

from support import html5lib_test_files, TestData, convert, convertExpected
from html5lib import html5parser, treebuilders, constants

treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree"),
Expand Down Expand Up @@ -34,8 +36,11 @@
pass

try:
import lxml.etree as lxml
treeTypes['lxml'] = treebuilders.getTreeBuilder("etree", lxml, fullTree=True)
try:
import lxml.html as lxml
except ImportError:
import lxml.etree as lxml
treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml", lxml, fullTree=True)
except ImportError:
pass

Expand Down Expand Up @@ -70,7 +75,11 @@ def runParserTest(self, innerHTML, input, expected, errors, treeClass):
if innerHTML:
document = p.parseFragment(StringIO.StringIO(input), innerHTML)
else:
document = p.parse(StringIO.StringIO(input))
try:
document = p.parse(StringIO.StringIO(input))
except constants.DataLossWarning:
sys.stderr.write("Test input causes known dataloss, skipping")
return
except:
errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
"\nTraceback:", traceback.format_exc()])
Expand Down

0 comments on commit f51d329

Please sign in to comment.