Skip to content
This repository has been archived by the owner on Aug 14, 2019. It is now read-only.

Commit

Permalink
fix overflowerror
Browse files Browse the repository at this point in the history
  • Loading branch information
romain-dartigues committed Oct 2, 2018
1 parent c86b093 commit ed1f765
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 54 deletions.
2 changes: 1 addition & 1 deletion characterentities/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def fixup(m):
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
except (OverflowError, ValueError):
pass
else:
try:
Expand Down
40 changes: 1 addition & 39 deletions characterentities/tools/charref.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
# stdlib
import logging
import optparse
import os
import re
import sys

Expand Down Expand Up @@ -59,16 +58,6 @@
}


def get_source(name):
'''
:param str name:
'''
m = _r_url(name).groupdict()
if m['scheme'] in (None, 'file'):
return open(m['loc'])
return urlopen(name)


def get_dumper(name):
conf = DUMPER.get(name, {})
dumper = getattr(__import__(name), conf.get('dump', 'dumps'))
Expand Down Expand Up @@ -102,10 +91,7 @@ def main(args=None):

dumper = get_dumper(opt.format)

fobj = fetch(
'https://dev.w3.org/html5/html-author/charref',
'/tmp/charref',
)
fobj = urlopen(opt.input)
document = parse_html5(fobj)
table = extract(document)

Expand All @@ -114,30 +100,6 @@ def main(args=None):



def fetch(src, dst='charref.html', force=False):
'''
:param str src: URL to fetch
:param str dst: output for cache
:param bool force: force download
:rtype: file
'''
try:
size = os.stat(dst).st_size
except OSError:
size = None
if force or not size:
fsrc = get_source(src)
with open(dst, 'wb') as fdst:
while True:
chunk = fsrc.read(4096)
if not chunk:
break
fdst.write(chunk)
fsrc.close()
return open(dst)



def extract(etree):
table = {}
for tr in etree.xpath('//tr'):
Expand Down
13 changes: 10 additions & 3 deletions tests/test_characterentities.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,22 @@ def create_assert_equal(cls, name, func, src, dst):
zip(*[('&{};'.format(k), unichr(v)) for k, v in name2codepoint.items()])
)

maxunicodeoverflow_dechex = '&#{0};&#x{0:x};'.format(sys.maxunicode + 1)
int32t_dechex = '&#{0};&#x{0:x};'.format((2<<30)-1)
int32t_overflow_dechex = '&#{0};&#x{0:x};'.format(2<<30)
data_decode_map = (
('ascii_uppercase', chars['&#41_5a'], string.ascii_uppercase),
('ascii_lowercase', chars['&#61_7a'], string.ascii_lowercase),
('ascii_all_decent', chars['&#0_ff'], chars['x00_ff'].decode('latin1')),
('ascii_all_hexent', chars['&#x0_ff'], chars['x00_ff'].decode('latin1')),
('known_entities', know_entities_ref, know_entities),
('cover_unknown_entity', '&foo;&bar;', '&foo;&bar;')
('cover_unknown_dec', '&#ff;', '')
('cover_unknown_hex', '&#xyz;&#xfffffff;', '')
('cover_unknown_entity', '&foo;&bar;', '&foo;&bar;'),
('cover_unknown_dec', '&#ff;', '&#ff;'),
('cover_unknown_hex', '&#xyz;&#xfffffff;', '&#xyz;&#xfffffff;'),
('maxunicode', '&#{0};&#x{0:x};'.format(sys.maxunicode), unichr(sys.maxunicode) * 2),
('maxunicode_over', maxunicodeoverflow_dechex, maxunicodeoverflow_dechex),
('unicode_int32t', int32t_dechex, int32t_dechex),
('int32t_overflow', int32t_overflow_dechex, int32t_overflow_dechex),
)

# to test: unichr(sys.maxunicode + 1) unichr(sys.maxint)
Expand Down
22 changes: 11 additions & 11 deletions tests/tools/test_charref.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,14 @@ def test_invalid(self):
# expected,
# )

for i, uri in enumerate((
'file:///dev/null',
'/dev/null',
# 'http://github.com/',
)):
GetSource.create_assert_create(
'True',
'open_{}'.format(i),
lambda x: hasattr(charref.get_source(x), 'fileno'),
uri,
)
# for i, uri in enumerate((
# 'file:///dev/null',
# '/dev/null',
# # 'http://github.com/',
# )):
# GetSource.create_assert_create(
# 'True',
# 'open_{}'.format(i),
# lambda x: hasattr(charref.get_source(x), 'fileno'),
# uri,
# )

0 comments on commit ed1f765

Please sign in to comment.