[refs #172] Further simplification of encoding handling but adding ba…

…ck exception catching to allow tools to carry on for other files
landscapeio · Aug 3, 2016 · e4dabff · e4dabff
1 parent 3a6480b
commit e4dabff
Showing 1 changed file with 23 additions and 3 deletions.
diff --git a/prospector/encoding.py b/prospector/encoding.py
@@ -3,8 +3,28 @@
 import tokenize
 
 
-def read_py_file(filename):
+class CouldNotHandleEncoding(Exception):
+    def __init__(self, path, cause):
+        self.path = path
+        self.cause = cause
+
+
+def read_py_file(filepath):
     if sys.version_info < (3, ):
-        return open(filename, 'rU').read()
+        return open(filepath, 'rU').read()
     else:
-        return tokenize.open(filename).read()
+        # see https://docs.python.org/3/library/tokenize.html#tokenize.detect_encoding
+        # first just see if the file is properly encoded
+        try:
+            tokenize.detect_encoding(filepath)
+        except SyntaxError as err:
+            # this warning is issued:
+            #   (1) in badly authored files (contains non-utf8 in a comment line)
+            #   (2) a coding is specified, but wrong and
+            #   (3) no coding is specified, and the default
+            #       'utf8' fails to decode.
+            #   (4) the encoding specified by a pep263 declaration did not match
+            #       with the encoding detected by inspecting the BOM
+            raise CouldNotHandleEncoding(filepath, err)
+
+        return tokenize.open(filepath).read()