fix(encoding): seek to 0 if no encoding was found

Closes #133
rubik · Sep 7, 2017 · 7d69ad9 · 7d69ad9
1 parent 7b69ac6
commit 7d69ad9
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 16 deletions.
diff --git a/radon/cli/tools.py b/radon/cli/tools.py
@@ -71,19 +71,21 @@ def _get_normal_name(orig_enc):
         def detect_encoding(readline):
             """
             The detect_encoding() function is used to detect the encoding that should
-            be used to decode a Python source file.  It requires one argument, readline,
+            be used to decode a Python source file. It requires one argument, readline,
             in the same way as the tokenize() generator.
 
             It will call readline a maximum of twice, and return the encoding used
             (as a string) and a list of any lines (left as bytes) it has read in.
 
             It detects the encoding from the presence of a utf-8 bom or an encoding
-            cookie as specified in pep-0263.  If both a bom and a cookie are present,
-            but disagree, a SyntaxError will be raised.  If the encoding cookie is an
-            invalid charset, raise a SyntaxError.  Note that if a utf-8 bom is found,
+            cookie as specified in pep-0263. If both a bom and a cookie are present,
+            but disagree, a SyntaxError will be raised. If the encoding cookie is an
+            invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
             'utf-8-sig' is returned.
 
             If no encoding is specified, then the default of 'utf-8' will be returned.
+            The third argument indicates whether the encoding cookie was found
+            or not.
             """
             try:
                 filename = readline.__self__.name
@@ -146,19 +148,19 @@ def find_cookie(line):
 
             encoding = find_cookie(first)
             if encoding:
-                return encoding, [first]
+                return encoding, [first], True
             if not blank_re.match(first):
-                return default, [first]
+                return default, [first], False
 
             second = read_or_stop()
             if not second:
-                return default, [first]
+                return default, [first], False
 
             encoding = find_cookie(second)
             if encoding:
-                return encoding, [first, second]
+                return encoding, [first, second], True
 
-            return default, [first, second]
+            return default, [first, second], False
 
 
         def _open_function(filename, encoding=None):
@@ -168,18 +170,22 @@ def _open_function(filename, encoding=None):
             # Note: Python 3 uses builtins.open here..
             buffer = _io_open_function(filename, 'rb')
             try:
-                encoding, lines = detect_encoding(buffer.readline)
+                encoding, lines, found = detect_encoding(buffer.readline)
                 # Note: Python 3's tokenize does buffer seek(0), but that
                 # leaves the encoding cookie in the file and ast.parse
                 # does not like Unicode text with an encoding cookie.
-                buffer.seek(sum(len(line) for line in lines))
+                # If the encoding was not found we seek to the start anyway
+                if found:
+                    buffer.seek(sum(len(line) for line in lines))
+                else:
+                    buffer.seek(0)
                 text = TextIOWrapper(buffer, encoding, line_buffering=True)
                 text.mode = 'r'
                 return text
             except:
                 buffer.close()
                 raise
-        
+
     else:
         _open_function = open
 

diff --git a/radon/tests/data/no_encoding.py b/radon/tests/data/no_encoding.py
@@ -0,0 +1,2 @@
+class Foo(object):
+    pass
diff --git a/radon/tests/test_cli.py b/radon/tests/test_cli.py
@@ -124,11 +124,17 @@ def test_encoding(mocker, log_mock):
         target = 'data/__init__.py'
     else:
         target = 'data/py3unicode.py'
-    fname = os.path.join(DIRNAME, target)
+    fnames = [
+        os.path.join(DIRNAME, target),
+        # This one will fail if detect_encoding() removes the first lines
+        # See #133
+        os.path.join(DIRNAME, 'data/no_encoding.py'),
+    ]
     for h_class, cfg in mappings.items():
-        harvester = h_class([fname], cfg)
-        assert not any(['error' in kw
-                        for msg, args, kw in harvester.to_terminal()])
+        for f in fnames:
+            harvester = h_class([f], cfg)
+            assert not any(['error' in kw
+                            for msg, args, kw in harvester.to_terminal()])
 
 
 @pytest.fixture