Skip to content

Commit

Permalink
fix(encoding): seek to 0 if no encoding was found
Browse files Browse the repository at this point in the history
Closes #133
  • Loading branch information
rubik committed Sep 7, 2017
1 parent 7b69ac6 commit 7d69ad9
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 16 deletions.
30 changes: 18 additions & 12 deletions radon/cli/tools.py
Expand Up @@ -71,19 +71,21 @@ def _get_normal_name(orig_enc):
def detect_encoding(readline):
"""
The detect_encoding() function is used to detect the encoding that should
be used to decode a Python source file. It requires one argument, readline,
be used to decode a Python source file. It requires one argument, readline,
in the same way as the tokenize() generator.
It will call readline a maximum of twice, and return the encoding used
(as a string) and a list of any lines (left as bytes) it has read in.
It detects the encoding from the presence of a utf-8 bom or an encoding
cookie as specified in pep-0263. If both a bom and a cookie are present,
but disagree, a SyntaxError will be raised. If the encoding cookie is an
invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
cookie as specified in pep-0263. If both a bom and a cookie are present,
but disagree, a SyntaxError will be raised. If the encoding cookie is an
invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
'utf-8-sig' is returned.
If no encoding is specified, then the default of 'utf-8' will be returned.
The third argument indicates whether the encoding cookie was found
or not.
"""
try:
filename = readline.__self__.name
Expand Down Expand Up @@ -146,19 +148,19 @@ def find_cookie(line):

encoding = find_cookie(first)
if encoding:
return encoding, [first]
return encoding, [first], True
if not blank_re.match(first):
return default, [first]
return default, [first], False

second = read_or_stop()
if not second:
return default, [first]
return default, [first], False

encoding = find_cookie(second)
if encoding:
return encoding, [first, second]
return encoding, [first, second], True

return default, [first, second]
return default, [first, second], False


def _open_function(filename, encoding=None):
Expand All @@ -168,18 +170,22 @@ def _open_function(filename, encoding=None):
# Note: Python 3 uses builtins.open here..
buffer = _io_open_function(filename, 'rb')
try:
encoding, lines = detect_encoding(buffer.readline)
encoding, lines, found = detect_encoding(buffer.readline)
# Note: Python 3's tokenize does buffer seek(0), but that
# leaves the encoding cookie in the file and ast.parse
# does not like Unicode text with an encoding cookie.
buffer.seek(sum(len(line) for line in lines))
# If the encoding was not found we seek to the start anyway
if found:
buffer.seek(sum(len(line) for line in lines))
else:
buffer.seek(0)
text = TextIOWrapper(buffer, encoding, line_buffering=True)
text.mode = 'r'
return text
except:
buffer.close()
raise

else:
_open_function = open

Expand Down
2 changes: 2 additions & 0 deletions radon/tests/data/no_encoding.py
@@ -0,0 +1,2 @@
class Foo(object):
pass
14 changes: 10 additions & 4 deletions radon/tests/test_cli.py
Expand Up @@ -124,11 +124,17 @@ def test_encoding(mocker, log_mock):
target = 'data/__init__.py'
else:
target = 'data/py3unicode.py'
fname = os.path.join(DIRNAME, target)
fnames = [
os.path.join(DIRNAME, target),
# This one will fail if detect_encoding() removes the first lines
# See #133
os.path.join(DIRNAME, 'data/no_encoding.py'),
]
for h_class, cfg in mappings.items():
harvester = h_class([fname], cfg)
assert not any(['error' in kw
for msg, args, kw in harvester.to_terminal()])
for f in fnames:
harvester = h_class([f], cfg)
assert not any(['error' in kw
for msg, args, kw in harvester.to_terminal()])


@pytest.fixture
Expand Down

0 comments on commit 7d69ad9

Please sign in to comment.