Permalink
Browse files

Welcome to Python 3.

We now fail the same three tests on both Py2 and Py3. I'm fairly certain the
meta-preparser among other things is broken on Py3, but we have no tests for
it. (We should fix that.)
  • Loading branch information...
1 parent dbcb5fc commit 74f0ad9cf806f4e1c2fd777406286cd0d1cc8424 @gsnedders gsnedders committed May 22, 2012
Showing with 10,047 additions and 9,021 deletions.
  1. +1 −1 README
  2. +0 −145 html5-tests.patch
  3. +4 −5 {src → }/html5lib/__init__.py
  4. +3,085 −0 html5lib/constants.py
  5. 0 {src → }/html5lib/filters/__init__.py
  6. 0 {src → }/html5lib/filters/_base.py
  7. +9 −10 {src → }/html5lib/filters/inject_meta_charset.py
  8. 0 {src → }/html5lib/filters/lint.py
  9. +8 −6 {src → }/html5lib/filters/optionaltags.py
  10. 0 {src → }/html5lib/filters/sanitizer.py
  11. 0 {src → }/html5lib/filters/whitespace.py
  12. +2,721 −0 html5lib/html5parser.py
  13. +29 −22 {src → }/html5lib/ihatexml.py
  14. +256 −201 {src → }/html5lib/inputstream.py
  15. +99 −69 {src → }/html5lib/sanitizer.py
  16. +17 −0 html5lib/serializer/__init__.py
  17. +318 −0 html5lib/serializer/htmlserializer.py
  18. 0 {src → }/html5lib/serializer/xhtmlserializer.py
  19. BIN html5lib/tests/.coverage
  20. +5 −0 html5lib/tests/README
  21. +12 −0 html5lib/tests/__init__.py
  22. +3 −3 { → html5lib}/tests/mockParser.py
  23. +2 −2 { → html5lib}/tests/performance/concatenation.py
  24. 0 { → html5lib}/tests/runparsertests.py
  25. +0 −6 { → html5lib}/tests/runtests.py
  26. +29 −19 { → html5lib}/tests/support.py
  27. +19 −9 { → html5lib}/tests/test_encoding.py
  28. +89 −0 html5lib/tests/test_parser.py
  29. +18 −1 { → html5lib}/tests/test_parser2.py
  30. +76 −0 html5lib/tests/test_sanitizer.py
  31. +193 −0 html5lib/tests/test_serializer.py
  32. +29 −26 { → html5lib}/tests/test_stream.py
  33. +86 −59 { → html5lib}/tests/test_tokenizer.py
  34. +78 −73 { → html5lib}/tests/test_treewalkers.py
  35. +128 −0 html5lib/tests/test_whitespace_filter.py
  36. +64 −0 html5lib/tests/tokenizertotree.py
  37. 0 { → html5lib}/tests/us-ascii.html
  38. 0 { → html5lib}/tests/utf-8-bom.html
  39. +1,746 −0 html5lib/tokenizer.py
  40. +18 −7 {src → }/html5lib/treebuilders/__init__.py
  41. +69 −30 {src → }/html5lib/treebuilders/_base.py
  42. +72 −26 {src → }/html5lib/treebuilders/dom.py
  43. +78 −24 {src → }/html5lib/treebuilders/etree.py
  44. +71 −34 {src → }/html5lib/treebuilders/etree_lxml.py
  45. +42 −11 {src → }/html5lib/treebuilders/simpletree.py
  46. +55 −22 {src → }/html5lib/treebuilders/soup.py
  47. +0 −6 {src → }/html5lib/treewalkers/__init__.py
  48. +44 −24 {src → }/html5lib/treewalkers/_base.py
  49. +9 −2 {src → }/html5lib/treewalkers/dom.py
  50. +25 −4 {src → }/html5lib/treewalkers/etree.py
  51. +14 −11 {src → }/html5lib/treewalkers/genshistream.py
  52. +186 −168 {src → }/html5lib/treewalkers/lxmletree.py
  53. +12 −4 {src → }/html5lib/treewalkers/pulldom.py
  54. +8 −2 {src → }/html5lib/treewalkers/simpletree.py
  55. +9 −8 {src → }/html5lib/treewalkers/soup.py
  56. +20 −1 {src → }/html5lib/utils.py
  57. +63 −50 parse.py
  58. +8 −13 setup.py
  59. +0 −40 setup_base.py
  60. +0 −1,111 src/html5lib/constants.py
  61. +0 −127 src/html5lib/filters/formfiller.py
  62. +0 −749 src/html5lib/filters/iso639codes.py
  63. +0 −29 src/html5lib/filters/rfc2046.py
  64. +0 −79 src/html5lib/filters/rfc3987.py
  65. +0 −716 src/html5lib/filters/validator.py
  66. +0 −2,262 src/html5lib/html5parser.py
  67. +0 −156 src/html5lib/liberalxmlparser.py
  68. +0 −3 src/html5lib/serializer/__init__.py
  69. +0 −221 src/html5lib/serializer/htmlserializer.py
  70. +0 −1,151 src/html5lib/tokenizer.py
  71. +0 −3 tests/README
  72. +0 −1 tests/__init__.py
  73. +0 −296 tests/test_formfiller.py
  74. +0 −243 tests/test_lxp.py
  75. +0 −166 tests/test_parser.py
  76. +0 −91 tests/test_sanitizer.py
  77. +0 −115 tests/test_sax.py
  78. +0 −95 tests/test_serializer.py
  79. +0 −32 tests/test_validator.py
  80. +0 −123 tests/test_whitespace_filter.py
  81. +88 −0 utils/entities.py
  82. +0 −13 utils/extract-entities.py
  83. +2 −2 utils/iana_parse.py
  84. +18 −12 utils/package.py
  85. +12 −12 utils/spider.py
  86. +0 −69 validate.py
View
2 README
@@ -11,7 +11,7 @@ html5lib is packaged with distutils. To install it use:
You may wish to check that your installation has been a success by
running the testsuite. All the tests can be run by invoking
-runtests.py in the tests/ directory
+runtests.py in the html5lib/tests/ directory
= Usage =
View
@@ -1,145 +0,0 @@
-Index: src/html5lib/treebuilders/dom.py
-===================================================================
---- src/html5lib/treebuilders/dom.py (revision 1118)
-+++ src/html5lib/treebuilders/dom.py (working copy)
-@@ -137,16 +137,21 @@
- rv = []
- def serializeElement(element, indent=0):
- if element.nodeType == Node.DOCUMENT_TYPE_NODE:
-- if element.name:
-- rv.append("|%s<!DOCTYPE %s>"%(' '*indent, element.name))
-+ if element.publicId:
-+ if element.systemId:
-+ rv.append('|%s<!DOCTYPE %s PUBLIC "%s" "%s">'%(' '*indent, element.name or "", element.publicId, element.systemId))
-+ else:
-+ rv.append('|%s<!DOCTYPE %s PUBLIC "%s">'%(' '*indent, element.name or "", element.publicId))
-+ elif element.systemId:
-+ rv.append('|%s<!DOCTYPE %s SYSTEM "%s">'%(' '*indent, element.name or "", element.systemId))
- else:
-- rv.append("|%s<!DOCTYPE >"%(' '*indent,))
-+ rv.append("|%s<!DOCTYPE %s>"%(' '*indent, element.name or ""))
- elif element.nodeType == Node.DOCUMENT_NODE:
- rv.append("#document")
- elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
- rv.append("#document-fragment")
- elif element.nodeType == Node.COMMENT_NODE:
-- rv.append("|%s<!-- %s -->"%(' '*indent, element.nodeValue))
-+ rv.append("|%s<!--%s-->"%(' '*indent, element.nodeValue))
- elif element.nodeType == Node.TEXT_NODE:
- rv.append("|%s\"%s\"" %(' '*indent, element.nodeValue))
- else:
-Index: src/html5lib/treebuilders/etree.py
-===================================================================
---- src/html5lib/treebuilders/etree.py (revision 1118)
-+++ src/html5lib/treebuilders/etree.py (working copy)
-@@ -169,7 +169,15 @@
- if not(hasattr(element, "tag")):
- element = element.getroot()
- if element.tag == "<!DOCTYPE>":
-- rv.append("|%s<!DOCTYPE %s>"%(' '*indent, element.text))
-+ if element.get("publicId"):
-+ if element.get("systemId"):
-+ rv.append('|%s<!DOCTYPE %s PUBLIC "%s" "%s">'%(' '*indent, element.text, element.get("publicId"), element.get("systemId")))
-+ else:
-+ rv.append('|%s<!DOCTYPE %s PUBLIC "%s">'%(' '*indent, element.text, element.get("publicId")))
-+ elif element.get("systemId"):
-+ rv.append('|%s<!DOCTYPE %s SYSTEM "%s">'%(' '*indent, element.text, element.get("systemId")))
-+ else:
-+ rv.append("|%s<!DOCTYPE %s>"%(' '*indent, element.text))
- elif element.tag == "<DOCUMENT_ROOT>":
- rv.append("#document")
- if element.text:
-@@ -177,7 +185,7 @@
- if element.tail:
- finalText = element.tail
- elif type(element.tag) == type(ElementTree.Comment):
-- rv.append("|%s<!-- %s -->"%(' '*indent, element.text))
-+ rv.append("|%s<!--%s-->"%(' '*indent, element.text))
- else:
- rv.append("|%s<%s>"%(' '*indent, element.tag))
- if hasattr(element, "attrib"):
-Index: src/html5lib/treebuilders/etree_lxml.py
-===================================================================
---- src/html5lib/treebuilders/etree_lxml.py (revision 1118)
-+++ src/html5lib/treebuilders/etree_lxml.py (working copy)
-@@ -50,12 +50,19 @@
- if not hasattr(element, "tag"):
- rv.append("#document")
- if element.docinfo.internalDTD:
-- if not (element.docinfo.public_id or element.docinfo.system_url):
-+ if element.docinfo.public_id:
-+ if element.docinfo.system_url:
-+ dtd_str = """<!DOCTYPE %s PUBLIC "%s" "%s">"""%(
-+ element.docinfo.root_name, element.docinfo.public_id,
-+ element.docinfo.system_url)
-+ else:
-+ dtd_str = """<!DOCTYPE %s PUBLIC "%s">"""%(
-+ element.docinfo.root_name, element.docinfo.public_id)
-+ elif element.docinfo.system_url:
-+ dtd_str = """<!DOCTYPE %s SYSTEM "%s">"""%(
-+ element.docinfo.root_name, element.docinfo.system_url)
-+ else:
- dtd_str = "<!DOCTYPE %s>"%element.docinfo.root_name
-- else:
-- dtd_str = """<!DOCTYPE %s PUBLIC "%s" "%s">"""%(
-- element.docinfo.root_name, element.docinfo.public_id,
-- element.docinfo.system_url)
- rv.append("|%s%s"%(' '*(indent+2), dtd_str))
- next_element = element.getroot()
- while next_element.getprevious() is not None:
-@@ -64,7 +71,7 @@
- serializeElement(next_element, indent+2)
- next_element = next_element.getnext()
- elif type(element.tag) == type(etree.Comment):
-- rv.append("|%s<!-- %s -->"%(' '*indent, element.text))
-+ rv.append("|%s<!--%s-->"%(' '*indent, element.text))
- else:
- rv.append("|%s<%s>"%(' '*indent, element.tag))
- if hasattr(element, "attrib"):
-Index: src/html5lib/treebuilders/simpletree.py
-===================================================================
---- src/html5lib/treebuilders/simpletree.py (revision 1118)
-+++ src/html5lib/treebuilders/simpletree.py (working copy)
-@@ -112,7 +112,15 @@
- self.systemId = u""
-
- def __unicode__(self):
-- return u"<!DOCTYPE %s>" % self.name
-+ if self.publicId:
-+ if self.systemId:
-+ return u'<!DOCTYPE %s PUBLIC "%s" "%s">'%(self.name, self.publicId, self.systemId)
-+ else:
-+ return u'<!DOCTYPE %s PUBLIC "%s">'%(self.name, self.publicId)
-+ elif self.systemId:
-+ return u'<!DOCTYPE %s SYSTEM "%s">'%(self.name, self.systemId)
-+ else:
-+ return u"<!DOCTYPE %s>"%self.name
-
- toxml = __unicode__
-
-@@ -186,7 +194,7 @@
- self.data = data
-
- def __unicode__(self):
-- return "<!-- %s -->" % self.data
-+ return u"<!--%s-->" % self.data
-
- def toxml(self):
- return "<!--%s-->" % self.data
-Index: tests/support.py
-===================================================================
---- tests/support.py (revision 1118)
-+++ tests/support.py (working copy)
-@@ -11,10 +11,10 @@
-
- #Define the location of the tests as this changes in release versions
- #RELEASE remove
--test_dir = os.path.join(os.path.pardir,os.path.pardir,'testdata')
-+test_dir = os.path.join(os.path.pardir,os.path.pardir,os.path.pardir,'html5-tests')
- #END RELEASE
- #RELEASE add
--#test_dir = './testdata'
-+#test_dir = './html5-tests'
- #END RELEASE
-
- try:
@@ -10,9 +10,8 @@
f = open("my_document.html")
tree = html5lib.parse(f)
"""
-
-from .html5parser import HTMLParser, parse
+__version__ = "0.95-dev"
+from .html5parser import HTMLParser, parse, parseFragment
from .treebuilders import getTreeBuilder
-
-#from .liberalxmlparser import XMLParser, XHTMLParser
-
+from .treewalkers import getTreeWalker
+from .serializer import serialize
Oops, something went wrong.

0 comments on commit 74f0ad9

Please sign in to comment.