diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h index 3ce8108e4e04f1..332cc30365b9e2 100644 --- a/Include/internal/pycore_fileutils.h +++ b/Include/internal/pycore_fileutils.h @@ -244,7 +244,8 @@ extern int _Py_add_relfile(wchar_t *dirname, const wchar_t *relfile, size_t bufsize); extern size_t _Py_find_basename(const wchar_t *filename); -PyAPI_FUNC(wchar_t *) _Py_normpath(wchar_t *path, Py_ssize_t size); +PyAPI_FUNC(wchar_t*) _Py_normpath(wchar_t *path, Py_ssize_t size); +extern wchar_t *_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *length); // Macros to protect CRT calls against instant termination when passed an diff --git a/Lib/test/test_genericpath.py b/Lib/test/test_genericpath.py index 489044f8090d3b..4f311c2d498e9f 100644 --- a/Lib/test/test_genericpath.py +++ b/Lib/test/test_genericpath.py @@ -460,6 +460,10 @@ def test_normpath_issue5827(self): for path in ('', '.', '/', '\\', '///foo/.//bar//'): self.assertIsInstance(self.pathmodule.normpath(path), str) + def test_normpath_issue106242(self): + for path in ('\x00', 'foo\x00bar', '\x00\x00', '\x00foo', 'foo\x00'): + self.assertEqual(self.pathmodule.normpath(path), path) + def test_abspath_issue3426(self): # Check that abspath returns unicode when the arg is unicode # with both ASCII and non-ASCII cwds. diff --git a/Misc/NEWS.d/next/Library/2023-08-14-23-11-11.gh-issue-106242.71HMym.rst b/Misc/NEWS.d/next/Library/2023-08-14-23-11-11.gh-issue-106242.71HMym.rst new file mode 100644 index 00000000000000..44237a9f15708c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-08-14-23-11-11.gh-issue-106242.71HMym.rst @@ -0,0 +1 @@ +Fixes :func:`os.path.normpath` to handle embedded null characters without truncating the path. diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 9d4228896d230e..5ceea7411818a4 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -4552,7 +4552,9 @@ os__path_normpath_impl(PyObject *module, PyObject *path) if (!buffer) { return NULL; } - PyObject *result = PyUnicode_FromWideChar(_Py_normpath(buffer, len), -1); + Py_ssize_t norm_len; + wchar_t *norm_path = _Py_normpath_and_size(buffer, len, &norm_len); + PyObject *result = PyUnicode_FromWideChar(norm_path, norm_len); PyMem_Free(buffer); return result; } diff --git a/Python/fileutils.c b/Python/fileutils.c index c86ed40b19937e..b310a6c077d91e 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2179,12 +2179,14 @@ _Py_find_basename(const wchar_t *filename) path, which will be within the original buffer. Guaranteed to not make the path longer, and will not fail. 'size' is the length of the path, if known. If -1, the first null character will be assumed - to be the end of the path. */ + to be the end of the path. 'normsize' will be set to contain the + length of the resulting normalized path. */ wchar_t * -_Py_normpath(wchar_t *path, Py_ssize_t size) +_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize) { assert(path != NULL); - if (!path[0] || size == 0) { + if ((size < 0 && !path[0]) || size == 0) { + *normsize = 0; return path; } wchar_t *pEnd = size >= 0 ? &path[size] : NULL; @@ -2233,11 +2235,7 @@ _Py_normpath(wchar_t *path, Py_ssize_t size) *p2++ = lastC = *p1; } } - if (sepCount) { - minP2 = p2; // Invalid path - } else { - minP2 = p2 - 1; // Absolute path has SEP at minP2 - } + minP2 = p2 - 1; } #else // Skip past two leading SEPs @@ -2297,13 +2295,28 @@ _Py_normpath(wchar_t *path, Py_ssize_t size) while (--p2 != minP2 && *p2 == SEP) { *p2 = L'\0'; } + } else { + --p2; } + *normsize = p2 - path + 1; #undef SEP_OR_END #undef IS_SEP #undef IS_END return path; } +/* In-place path normalisation. Returns the start of the normalized + path, which will be within the original buffer. Guaranteed to not + make the path longer, and will not fail. 'size' is the length of + the path, if known. If -1, the first null character will be assumed + to be the end of the path. */ +wchar_t * +_Py_normpath(wchar_t *path, Py_ssize_t size) +{ + Py_ssize_t norm_length; + return _Py_normpath_and_size(path, size, &norm_length); +} + /* Get the current directory. buflen is the buffer size in wide characters including the null character. Decode the path from the locale encoding.