Permalink
Browse files

Rebuild Py3

  • Loading branch information...
1 parent ef7fac5 commit 1c9c13c66d19b24a917092757ac5df7c7d6bc109 @gsnedders gsnedders committed May 22, 2012
View
@@ -193,9 +193,9 @@ def openStream(self, source):
else:
# Otherwise treat source as a string and convert to a file object
if isinstance(source, str):
- # This can error (on invalid characters, thus the need for the argument)
- source = source.encode('utf-32', errors="replace")
- self.charEncoding = ("utf-32", "certain")
+ # XXX: we should handle lone surrogates here
+ source = source.encode('utf-8', errors="replace")
+ self.charEncoding = ("utf-8", "certain")
try:
from io import BytesIO
except:
@@ -783,9 +783,7 @@ def codecName(encoding):
"""Return the python codec name corresponding to an encoding or None if the
string doesn't correspond to a valid encoding."""
if encoding:
- print(encoding)
canonicalName = ascii_punctuation_re.sub("", encoding).lower()
- print(canonicalName)
return encodings.get(canonicalName, None)
else:
return None
View
@@ -49,8 +49,8 @@ class HTMLSanitizerMixin(object):
'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
- 'optimum', 'pattern', 'ping', 'point-size', 'prompt', 'pqg',
- 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
+ 'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
+ 'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
@@ -97,7 +97,7 @@ class HTMLSanitizerMixin(object):
'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
'y1', 'y2', 'zoomAndPan']
- attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc',
+ attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster',
'xlink:href', 'xml:base']
svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
@@ -62,9 +62,8 @@
except ImportError:
pass
-def html5lib_test_files(subdirectory, files='*.dat'):
+def get_data_files(subdirectory, files='*.dat'):
return glob.glob(os.path.join(test_dir,subdirectory,files))
-html5lib_test_files.__test__ = False
class DefaultDict(dict):
def __init__(self, default, *args, **kwargs):
@@ -7,7 +7,7 @@
except AttributeError:
unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
-from .support import html5lib_test_files, TestData, test_dir
+from .support import get_data_files, TestData, test_dir
from html5lib import HTMLParser, inputstream
class Html5EncodingTestCase(unittest.TestCase):
@@ -24,7 +24,7 @@ def test_codec_name_d(self):
self.assertEqual(inputstream.codecName("ISO_8859--1"), "windows-1252")
def buildTestSuite():
- for filename in html5lib_test_files("encoding"):
+ for filename in get_data_files("encoding"):
test_name = os.path.basename(filename).replace('.dat',''). \
replace('-','')
tests = TestData(filename, "data")
@@ -7,7 +7,7 @@
warnings.simplefilter("error")
-from .support import html5lib_test_files as data_files
+from .support import get_data_files
from .support import TestData, convert, convertExpected, treeTypes
import html5lib
from html5lib import html5parser, treebuilders, constants
@@ -67,7 +67,7 @@ def runParserTest(innerHTML, input, expected, errors, treeClass,
def test_parser():
sys.stderr.write('Testing tree builders '+ " ".join(list(treeTypes.keys())) + "\n")
- files = data_files('tree-construction')
+ files = get_data_files('tree-construction')
for filename in files:
testName = os.path.basename(filename).replace(".dat","")
@@ -1,6 +1,6 @@
import os
import unittest
-from .support import html5lib_test_files
+from .support import get_data_files
try:
import json
@@ -183,11 +183,12 @@ def testEntityNoResolve(self):
self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>""", result)
def test_serializer():
- for filename in html5lib_test_files('serializer', '*.test'):
- tests = json.load(open(filename))
- test_name = os.path.basename(filename).replace('.test','')
- for index, test in enumerate(tests['tests']):
- xhtml = test.get("xhtml", test["expected"])
- if test_name == 'optionaltags':
- xhtml = None
- yield runSerializerTest, test["input"], test["expected"], xhtml, test.get("options", {})
+ for filename in get_data_files('serializer', '*.test'):
+ with open(filename) as fp:
+ tests = json.load(fp)
+ test_name = os.path.basename(filename).replace('.test','')
+ for index, test in enumerate(tests['tests']):
+ xhtml = test.get("xhtml", test["expected"])
+ if test_name == 'optionaltags':
+ xhtml = None
+ yield runSerializerTest, test["input"], test["expected"], xhtml, test.get("options", {})
@@ -2,7 +2,6 @@
import sys
import os
-import unittest
import io
import warnings
import re
@@ -12,7 +11,7 @@
except ImportError:
import simplejson as json
-from .support import html5lib_test_files
+from .support import get_data_files
from html5lib.tokenizer import HTMLTokenizer
from html5lib import constants
@@ -124,7 +123,7 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
tokens[tokenType][1].append(token)
return tokens["expected"] == tokens["received"]
-def unescape_test(test):
+def unescape(test):
def decode(inp):
return inp.encode("utf-8").decode("unicode-escape")
@@ -139,14 +138,12 @@ def decode(inp):
del token[2][key]
token[2][decode(key)] = decode(value)
return test
-unescape_test.__test__ = False
-
def runTokenizerTest(test):
#XXX - move this out into the setup function
#concatenate all consecutive character tokens into a single token
if 'doubleEscaped' in test:
- test = unescape_test(test)
+ test = unescape(test)
expected = concatenateCharacterTokens(test['output'])
if 'lastStartTag' not in test:
@@ -166,8 +163,7 @@ def runTokenizerTest(test):
"\nreceived:", str(tokens)])
errorMsg = errorMsg
ignoreErrorOrder = test.get('ignoreErrorOrder', False)
- assert tokensMatch(expected, received, ignoreErrorOrder), errorMsg
-
+ assert tokensMatch(expected, received, ignoreErrorOrder, True), errorMsg
def _doCapitalize(match):
return match.group(1).upper()
@@ -179,19 +175,17 @@ def capitalize(s):
s = _capitalizeRe(_doCapitalize, s)
return s
-
-def test_tokenizer():
- for filename in html5lib_test_files('tokenizer', '*.test'):
+def testTokenizer():
+ for filename in get_data_files('tokenizer', '*.test'):
with open(filename) as fp:
tests = json.load(fp)
testName = os.path.basename(filename).replace(".test","")
if 'tests' in tests:
for index,test in enumerate(tests['tests']):
- #Skip tests with a self closing flag
+ #Skip tests with a self closing flag
skip = False
if 'initialStates' not in test:
test["initialStates"] = ["Data state"]
for initialState in test["initialStates"]:
test["initialState"] = capitalize(initialState)
yield runTokenizerTest, test
-
@@ -10,7 +10,7 @@
warnings.simplefilter("error")
-from .support import html5lib_test_files, TestData, convertExpected
+from .support import get_data_files, TestData, convertExpected
from html5lib import html5parser, treewalkers, treebuilders, constants
from html5lib.filters.lint import Filter as LintFilter, LintError
@@ -298,7 +298,7 @@ def test_treewalker():
sys.stdout.write('Testing tree walkers '+ " ".join(list(treeTypes.keys())) + "\n")
for treeName, treeCls in treeTypes.items():
- files = html5lib_test_files('tree-construction')
+ files = get_data_files('tree-construction')
for filename in files:
testName = os.path.basename(filename).replace(".dat","")
@@ -17,7 +17,7 @@ def main(out_path):
sys.stderr.write("Path %s does not exist"%out_path)
sys.exit(1)
- for filename in support.html5lib_test_files('tokenizer', '*.test'):
+ for filename in support.get_data_files('tokenizer', '*.test'):
run_file(filename, out_path)
def run_file(filename, out_path):

0 comments on commit 1c9c13c

Please sign in to comment.