Skip to content

Commit 9e4994d

Browse files
authored
bpo-34485: Enhance init_sys_streams() (GH-8978)
Python now gets the locale encoding with C code to initialize the encoding of standard streams like sys.stdout. Moreover, the encoding is now initialized to the Python codec name to get a normalized encoding name and to ensure that the codec is loaded. The change avoids importing _bootlocale and _locale modules at startup by default. When the PYTHONIOENCODING environment variable only contains an encoding, the error handler is now is now set explicitly to "strict". Rename also get_default_standard_stream_error_handler() to get_stdio_errors(). Reduce the buffer to format the "cpXXX" string (Windows locale encoding).
1 parent d500e53 commit 9e4994d

File tree

8 files changed

+91
-43
lines changed

8 files changed

+91
-43
lines changed

Lib/test/test_embed.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -171,17 +171,17 @@ def test_forced_io_encoding(self):
171171
"stdout: {out_encoding}:ignore",
172172
"stderr: {out_encoding}:backslashreplace",
173173
"--- Set encoding only ---",
174-
"Expected encoding: latin-1",
174+
"Expected encoding: iso8859-1",
175175
"Expected errors: default",
176-
"stdin: latin-1:{errors}",
177-
"stdout: latin-1:{errors}",
178-
"stderr: latin-1:backslashreplace",
176+
"stdin: iso8859-1:{errors}",
177+
"stdout: iso8859-1:{errors}",
178+
"stderr: iso8859-1:backslashreplace",
179179
"--- Set encoding and errors ---",
180-
"Expected encoding: latin-1",
180+
"Expected encoding: iso8859-1",
181181
"Expected errors: replace",
182-
"stdin: latin-1:replace",
183-
"stdout: latin-1:replace",
184-
"stderr: latin-1:backslashreplace"])
182+
"stdin: iso8859-1:replace",
183+
"stdout: iso8859-1:replace",
184+
"stderr: iso8859-1:backslashreplace"])
185185
expected_output = expected_output.format(
186186
in_encoding=expected_stream_encoding,
187187
out_encoding=expected_stream_encoding,

Lib/test/test_sys.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -668,7 +668,7 @@ def c_locale_get_error_handler(self, isolated=False, encoding=None):
668668
'dump("stdout")',
669669
'dump("stderr")',
670670
))
671-
args = [sys.executable, "-c", code]
671+
args = [sys.executable, "-X", "utf8=0", "-c", code]
672672
if isolated:
673673
args.append("-I")
674674
if encoding is not None:
@@ -712,8 +712,8 @@ def test_c_locale_surrogateescape(self):
712712
# have no any effect
713713
out = self.c_locale_get_error_handler(encoding=':')
714714
self.assertEqual(out,
715-
'stdin: strict\n'
716-
'stdout: strict\n'
715+
'stdin: surrogateescape\n'
716+
'stdout: surrogateescape\n'
717717
'stderr: backslashreplace\n')
718718
out = self.c_locale_get_error_handler(encoding='')
719719
self.assertEqual(out,

Lib/test/test_utf8_mode.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -139,16 +139,16 @@ def test_stdio(self):
139139
out = self.get_output('-X', 'utf8', '-c', code,
140140
PYTHONIOENCODING="latin1")
141141
self.assertEqual(out.splitlines(),
142-
['stdin: latin1/strict',
143-
'stdout: latin1/strict',
144-
'stderr: latin1/backslashreplace'])
142+
['stdin: iso8859-1/strict',
143+
'stdout: iso8859-1/strict',
144+
'stderr: iso8859-1/backslashreplace'])
145145

146146
out = self.get_output('-X', 'utf8', '-c', code,
147147
PYTHONIOENCODING=":namereplace")
148148
self.assertEqual(out.splitlines(),
149-
['stdin: UTF-8/namereplace',
150-
'stdout: UTF-8/namereplace',
151-
'stderr: UTF-8/backslashreplace'])
149+
['stdin: utf-8/namereplace',
150+
'stdout: utf-8/namereplace',
151+
'stderr: utf-8/backslashreplace'])
152152

153153
def test_io(self):
154154
code = textwrap.dedent('''
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Python now gets the locale encoding with C code to initialize the encoding
2+
of standard streams like sys.stdout. Moreover, the encoding is now
3+
initialized to the Python codec name to get a normalized encoding name and
4+
to ensure that the codec is loaded. The change avoids importing _bootlocale
5+
and _locale modules at startup by default.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix the error handler of standard streams like sys.stdout:
2+
PYTHONIOENCODING=":" is now ignored instead of setting the error handler to
3+
"strict".

Modules/_localemodule.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ PyLocale_strxfrm(PyObject* self, PyObject* args)
319319
static PyObject*
320320
PyLocale_getdefaultlocale(PyObject* self, PyObject *Py_UNUSED(ignored))
321321
{
322-
char encoding[100];
322+
char encoding[20];
323323
char locale[100];
324324

325325
PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP());

Programs/_testembed.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,9 @@ static int test_forced_io_encoding(void)
113113
printf("--- Set errors only ---\n");
114114
check_stdio_details(NULL, "ignore");
115115
printf("--- Set encoding only ---\n");
116-
check_stdio_details("latin-1", NULL);
116+
check_stdio_details("iso8859-1", NULL);
117117
printf("--- Set encoding and errors ---\n");
118-
check_stdio_details("latin-1", "replace");
118+
check_stdio_details("iso8859-1", "replace");
119119

120120
/* Check calling after initialization fails */
121121
Py_Initialize();

Python/pylifecycle.c

Lines changed: 63 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -244,22 +244,26 @@ get_codec_name(const char *encoding)
244244
return NULL;
245245
}
246246

247-
static char*
248-
get_locale_encoding(void)
247+
static _PyInitError
248+
get_locale_encoding(char **locale_encoding)
249249
{
250-
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
251-
char* codeset = nl_langinfo(CODESET);
252-
if (!codeset || codeset[0] == '\0') {
253-
PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty");
254-
return NULL;
255-
}
256-
return get_codec_name(codeset);
250+
#ifdef MS_WINDOWS
251+
char encoding[20];
252+
PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP());
257253
#elif defined(__ANDROID__)
258-
return get_codec_name("UTF-8");
254+
const char *encoding = "UTF-8";
259255
#else
260-
PyErr_SetNone(PyExc_NotImplementedError);
261-
return NULL;
256+
const char *encoding = nl_langinfo(CODESET);
257+
if (!encoding || encoding[0] == '\0') {
258+
return _Py_INIT_USER_ERR("failed to get the locale encoding: "
259+
"nl_langinfo(CODESET) failed");
260+
}
262261
#endif
262+
*locale_encoding = _PyMem_RawStrdup(encoding);
263+
if (*locale_encoding == NULL) {
264+
return _Py_INIT_NO_MEMORY();
265+
}
266+
return _Py_INIT_OK();
263267
}
264268

265269
static _PyInitError
@@ -397,7 +401,7 @@ static _LocaleCoercionTarget _TARGET_LOCALES[] = {
397401
};
398402

399403
static const char *
400-
get_default_standard_stream_error_handler(void)
404+
get_stdio_errors(void)
401405
{
402406
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
403407
if (ctype_loc != NULL) {
@@ -417,8 +421,7 @@ get_default_standard_stream_error_handler(void)
417421
#endif
418422
}
419423

420-
/* Otherwise return NULL to request the typical default error handler */
421-
return NULL;
424+
return "strict";
422425
}
423426

424427
#ifdef PY_COERCE_C_LOCALE
@@ -1586,9 +1589,17 @@ initfsencoding(PyInterpreterState *interp)
15861589
Py_HasFileSystemDefaultEncoding = 1;
15871590
}
15881591
else {
1589-
Py_FileSystemDefaultEncoding = get_locale_encoding();
1592+
char *locale_encoding;
1593+
_PyInitError err = get_locale_encoding(&locale_encoding);
1594+
if (_Py_INIT_FAILED(err)) {
1595+
return err;
1596+
}
1597+
1598+
Py_FileSystemDefaultEncoding = get_codec_name(locale_encoding);
1599+
PyMem_RawFree(locale_encoding);
15901600
if (Py_FileSystemDefaultEncoding == NULL) {
1591-
return _Py_INIT_ERR("Unable to get the locale encoding");
1601+
return _Py_INIT_ERR("failed to get the Python codec "
1602+
"of the locale encoding");
15921603
}
15931604

15941605
Py_HasFileSystemDefaultEncoding = 0;
@@ -1787,6 +1798,8 @@ init_sys_streams(PyInterpreterState *interp)
17871798
PyObject * encoding_attr;
17881799
char *pythonioencoding = NULL;
17891800
const char *encoding, *errors;
1801+
char *locale_encoding = NULL;
1802+
char *codec_name = NULL;
17901803
_PyInitError res = _Py_INIT_OK();
17911804

17921805
/* Hack to avoid a nasty recursion issue when Python is invoked
@@ -1838,21 +1851,46 @@ init_sys_streams(PyInterpreterState *interp)
18381851
errors = err;
18391852
}
18401853
}
1841-
if (*pythonioencoding && !encoding) {
1854+
if (!encoding && *pythonioencoding) {
18421855
encoding = pythonioencoding;
1856+
if (!errors) {
1857+
errors = "strict";
1858+
}
18431859
}
18441860
}
1845-
else if (interp->core_config.utf8_mode) {
1846-
encoding = "utf-8";
1847-
errors = "surrogateescape";
1861+
1862+
if (interp->core_config.utf8_mode) {
1863+
if (!encoding) {
1864+
encoding = "utf-8";
1865+
}
1866+
if (!errors) {
1867+
errors = "surrogateescape";
1868+
}
18481869
}
18491870

1850-
if (!errors && !pythonioencoding) {
1871+
if (!errors) {
18511872
/* Choose the default error handler based on the current locale */
1852-
errors = get_default_standard_stream_error_handler();
1873+
errors = get_stdio_errors();
18531874
}
18541875
}
18551876

1877+
if (encoding == NULL) {
1878+
_PyInitError err = get_locale_encoding(&locale_encoding);
1879+
if (_Py_INIT_FAILED(err)) {
1880+
return err;
1881+
}
1882+
encoding = locale_encoding;
1883+
}
1884+
1885+
codec_name = get_codec_name(encoding);
1886+
if (codec_name == NULL) {
1887+
PyErr_SetString(PyExc_RuntimeError,
1888+
"failed to get the Python codec name "
1889+
"of stdio encoding");
1890+
goto error;
1891+
}
1892+
encoding = codec_name;
1893+
18561894
/* Set sys.stdin */
18571895
fd = fileno(stdin);
18581896
/* Under some conditions stdin, stdout and stderr may not be connected
@@ -1928,6 +1966,8 @@ init_sys_streams(PyInterpreterState *interp)
19281966

19291967
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
19301968

1969+
PyMem_RawFree(locale_encoding);
1970+
PyMem_RawFree(codec_name);
19311971
PyMem_Free(pythonioencoding);
19321972
Py_XDECREF(bimod);
19331973
Py_XDECREF(iomod);

0 commit comments

Comments
 (0)