Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Fix UnicodeEncodeError on file encoding detection
If the first line of a python file is not a valid latin-1 string,
parse_encoding dies with "UnicodeDecodeError". These strings nonetheless can be
valid in some scenarios (for example, Mako extractor uses
babel.messages.extract.extract_python), and it makes more sense to ignore this
exception and return None.
  • Loading branch information
imankulov committed Jan 4, 2016
1 parent 4f1e0c5 commit f9b04a5
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
2 changes: 1 addition & 1 deletion babel/util.py
Expand Up @@ -65,7 +65,7 @@ def parse_encoding(fp):
try:
import parser
parser.suite(line1.decode('latin-1'))
except (ImportError, SyntaxError):
except (ImportError, SyntaxError, UnicodeEncodeError):
# Either it's a real syntax error, in which case the source is
# not valid python source, or line2 is a continuation of line1,
# in which case we don't want to scan line2 for a magic
Expand Down
15 changes: 15 additions & 0 deletions tests/test_util.py
Expand Up @@ -14,6 +14,7 @@
import unittest

from babel import util
from babel._compat import BytesIO


def test_distinct():
Expand Down Expand Up @@ -52,3 +53,17 @@ def test_zone_zero_offset(self):
def test_zone_positive_offset(self):
self.assertEqual('Etc/GMT+330', util.FixedOffsetTimezone(330).zone)


parse_encoding = lambda s: util.parse_encoding(BytesIO(s.encode('utf-8')))


def test_parse_encoding_defined():
assert parse_encoding(u'# coding: utf-8') == 'utf-8'


def test_parse_encoding_undefined():
assert parse_encoding(u'') is None


def test_parse_encoding_non_ascii():
assert parse_encoding(u'K\xf6ln') is None

0 comments on commit f9b04a5

Please sign in to comment.