Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
256 lines (209 sloc) 9.22 KB
"""
Test the implementation of the PEP 540: the UTF-8 Mode.
"""
import locale
import sys
import textwrap
import unittest
from test import support
from test.support.script_helper import assert_python_ok, assert_python_failure
MS_WINDOWS = (sys.platform == 'win32')
POSIX_LOCALES = ('C', 'POSIX')
VXWORKS = (sys.platform == "vxworks")
class UTF8ModeTests(unittest.TestCase):
DEFAULT_ENV = {
'PYTHONUTF8': '',
'PYTHONLEGACYWINDOWSFSENCODING': '',
'PYTHONCOERCECLOCALE': '0',
}
def posix_locale(self):
loc = locale.setlocale(locale.LC_CTYPE, None)
return (loc in POSIX_LOCALES)
def get_output(self, *args, failure=False, **kw):
kw = dict(self.DEFAULT_ENV, **kw)
if failure:
out = assert_python_failure(*args, **kw)
out = out[2]
else:
out = assert_python_ok(*args, **kw)
out = out[1]
return out.decode().rstrip("\n\r")
@unittest.skipIf(MS_WINDOWS, 'Windows has no POSIX locale')
def test_posix_locale(self):
code = 'import sys; print(sys.flags.utf8_mode)'
for loc in POSIX_LOCALES:
with self.subTest(LC_ALL=loc):
out = self.get_output('-c', code, LC_ALL=loc)
self.assertEqual(out, '1')
def test_xoption(self):
code = 'import sys; print(sys.flags.utf8_mode)'
out = self.get_output('-X', 'utf8', '-c', code)
self.assertEqual(out, '1')
# undocumented but accepted syntax: -X utf8=1
out = self.get_output('-X', 'utf8=1', '-c', code)
self.assertEqual(out, '1')
out = self.get_output('-X', 'utf8=0', '-c', code)
self.assertEqual(out, '0')
if MS_WINDOWS:
# PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 Mode
# and has the priority over -X utf8
out = self.get_output('-X', 'utf8', '-c', code,
PYTHONLEGACYWINDOWSFSENCODING='1')
self.assertEqual(out, '0')
def test_env_var(self):
code = 'import sys; print(sys.flags.utf8_mode)'
out = self.get_output('-c', code, PYTHONUTF8='1')
self.assertEqual(out, '1')
out = self.get_output('-c', code, PYTHONUTF8='0')
self.assertEqual(out, '0')
# -X utf8 has the priority over PYTHONUTF8
out = self.get_output('-X', 'utf8=0', '-c', code, PYTHONUTF8='1')
self.assertEqual(out, '0')
if MS_WINDOWS:
# PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
# and has the priority over PYTHONUTF8
out = self.get_output('-X', 'utf8', '-c', code, PYTHONUTF8='1',
PYTHONLEGACYWINDOWSFSENCODING='1')
self.assertEqual(out, '0')
# Cannot test with the POSIX locale, since the POSIX locale enables
# the UTF-8 mode
if not self.posix_locale():
# PYTHONUTF8 should be ignored if -E is used
out = self.get_output('-E', '-c', code, PYTHONUTF8='1')
self.assertEqual(out, '0')
# invalid mode
out = self.get_output('-c', code, PYTHONUTF8='xxx', failure=True)
self.assertIn('invalid PYTHONUTF8 environment variable value',
out.rstrip())
def test_filesystemencoding(self):
code = textwrap.dedent('''
import sys
print("{}/{}".format(sys.getfilesystemencoding(),
sys.getfilesystemencodeerrors()))
''')
if MS_WINDOWS:
expected = 'utf-8/surrogatepass'
else:
expected = 'utf-8/surrogateescape'
out = self.get_output('-X', 'utf8', '-c', code)
self.assertEqual(out, expected)
if MS_WINDOWS:
# PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
# and has the priority over -X utf8 and PYTHONUTF8
out = self.get_output('-X', 'utf8', '-c', code,
PYTHONUTF8='strict',
PYTHONLEGACYWINDOWSFSENCODING='1')
self.assertEqual(out, 'mbcs/replace')
def test_stdio(self):
code = textwrap.dedent('''
import sys
print(f"stdin: {sys.stdin.encoding}/{sys.stdin.errors}")
print(f"stdout: {sys.stdout.encoding}/{sys.stdout.errors}")
print(f"stderr: {sys.stderr.encoding}/{sys.stderr.errors}")
''')
out = self.get_output('-X', 'utf8', '-c', code,
PYTHONIOENCODING='')
self.assertEqual(out.splitlines(),
['stdin: utf-8/surrogateescape',
'stdout: utf-8/surrogateescape',
'stderr: utf-8/backslashreplace'])
# PYTHONIOENCODING has the priority over PYTHONUTF8
out = self.get_output('-X', 'utf8', '-c', code,
PYTHONIOENCODING="latin1")
self.assertEqual(out.splitlines(),
['stdin: iso8859-1/strict',
'stdout: iso8859-1/strict',
'stderr: iso8859-1/backslashreplace'])
out = self.get_output('-X', 'utf8', '-c', code,
PYTHONIOENCODING=":namereplace")
self.assertEqual(out.splitlines(),
['stdin: utf-8/namereplace',
'stdout: utf-8/namereplace',
'stderr: utf-8/backslashreplace'])
def test_io(self):
code = textwrap.dedent('''
import sys
filename = sys.argv[1]
with open(filename) as fp:
print(f"{fp.encoding}/{fp.errors}")
''')
filename = __file__
out = self.get_output('-c', code, filename, PYTHONUTF8='1')
self.assertEqual(out, 'UTF-8/strict')
def _check_io_encoding(self, module, encoding=None, errors=None):
filename = __file__
# Encoding explicitly set
args = []
if encoding:
args.append(f'encoding={encoding!r}')
if errors:
args.append(f'errors={errors!r}')
code = textwrap.dedent('''
import sys
from %s import open
filename = sys.argv[1]
with open(filename, %s) as fp:
print(f"{fp.encoding}/{fp.errors}")
''') % (module, ', '.join(args))
out = self.get_output('-c', code, filename,
PYTHONUTF8='1')
if not encoding:
encoding = 'UTF-8'
if not errors:
errors = 'strict'
self.assertEqual(out, f'{encoding}/{errors}')
def check_io_encoding(self, module):
self._check_io_encoding(module, encoding="latin1")
self._check_io_encoding(module, errors="namereplace")
self._check_io_encoding(module,
encoding="latin1", errors="namereplace")
def test_io_encoding(self):
self.check_io_encoding('io')
def test_pyio_encoding(self):
self.check_io_encoding('_pyio')
def test_locale_getpreferredencoding(self):
code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))'
out = self.get_output('-X', 'utf8', '-c', code)
self.assertEqual(out, 'UTF-8 UTF-8')
for loc in POSIX_LOCALES:
with self.subTest(LC_ALL=loc):
out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc)
self.assertEqual(out, 'UTF-8 UTF-8')
@unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
def test_cmd_line(self):
arg = 'h\xe9\u20ac'.encode('utf-8')
arg_utf8 = arg.decode('utf-8')
arg_ascii = arg.decode('ascii', 'surrogateescape')
code = 'import locale, sys; print("%s:%s" % (locale.getpreferredencoding(), ascii(sys.argv[1:])))'
def check(utf8_opt, expected, **kw):
out = self.get_output('-X', utf8_opt, '-c', code, arg, **kw)
args = out.partition(':')[2].rstrip()
self.assertEqual(args, ascii(expected), out)
check('utf8', [arg_utf8])
for loc in POSIX_LOCALES:
with self.subTest(LC_ALL=loc):
check('utf8', [arg_utf8], LC_ALL=loc)
if sys.platform == 'darwin' or support.is_android or VXWORKS:
c_arg = arg_utf8
elif sys.platform.startswith("aix"):
c_arg = arg.decode('iso-8859-1')
else:
c_arg = arg_ascii
for loc in POSIX_LOCALES:
with self.subTest(LC_ALL=loc):
check('utf8=0', [c_arg], LC_ALL=loc)
def test_optim_level(self):
# CPython: check that Py_Main() doesn't increment Py_OptimizeFlag
# twice when -X utf8 requires to parse the configuration twice (when
# the encoding changes after reading the configuration, the
# configuration is read again with the new encoding).
code = 'import sys; print(sys.flags.optimize)'
out = self.get_output('-X', 'utf8', '-O', '-c', code)
self.assertEqual(out, '1')
out = self.get_output('-X', 'utf8', '-OO', '-c', code)
self.assertEqual(out, '2')
code = 'import sys; print(sys.flags.ignore_environment)'
out = self.get_output('-X', 'utf8', '-E', '-c', code)
self.assertEqual(out, '1')
if __name__ == "__main__":
unittest.main()
You can’t perform that action at this time.