Skip to content

Commit

Permalink
[refs #172] Further simplification of encoding handling but adding ba…
Browse files Browse the repository at this point in the history
…ck exception catching to allow tools to carry on for other files
  • Loading branch information
carlio committed Aug 3, 2016
1 parent 3a6480b commit e4dabff
Showing 1 changed file with 23 additions and 3 deletions.
26 changes: 23 additions & 3 deletions prospector/encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,28 @@
import tokenize


def read_py_file(filename):
class CouldNotHandleEncoding(Exception):
def __init__(self, path, cause):
self.path = path
self.cause = cause


def read_py_file(filepath):
if sys.version_info < (3, ):
return open(filename, 'rU').read()
return open(filepath, 'rU').read()
else:
return tokenize.open(filename).read()
# see https://docs.python.org/3/library/tokenize.html#tokenize.detect_encoding
# first just see if the file is properly encoded
try:
tokenize.detect_encoding(filepath)
except SyntaxError as err:
# this warning is issued:
# (1) in badly authored files (contains non-utf8 in a comment line)
# (2) a coding is specified, but wrong and
# (3) no coding is specified, and the default
# 'utf8' fails to decode.
# (4) the encoding specified by a pep263 declaration did not match
# with the encoding detected by inspecting the BOM
raise CouldNotHandleEncoding(filepath, err)

return tokenize.open(filepath).read()

0 comments on commit e4dabff

Please sign in to comment.