Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.11] gh-106242: Fix path truncation in os.path.normpath (GH-106816) #107982

Merged
merged 2 commits into from
Aug 15, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Include/internal/pycore_fileutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,8 @@ extern int _Py_add_relfile(wchar_t *dirname,
const wchar_t *relfile,
size_t bufsize);
extern size_t _Py_find_basename(const wchar_t *filename);
PyAPI_FUNC(wchar_t *) _Py_normpath(wchar_t *path, Py_ssize_t size);
PyAPI_FUNC(wchar_t*) _Py_normpath(wchar_t *path, Py_ssize_t size);
extern wchar_t *_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *length);


// Macros to protect CRT calls against instant termination when passed an
Expand Down
4 changes: 4 additions & 0 deletions Lib/test/test_genericpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,10 @@ def test_normpath_issue5827(self):
for path in ('', '.', '/', '\\', '///foo/.//bar//'):
self.assertIsInstance(self.pathmodule.normpath(path), str)

def test_normpath_issue106242(self):
for path in ('\x00', 'foo\x00bar', '\x00\x00', '\x00foo', 'foo\x00'):
self.assertEqual(self.pathmodule.normpath(path), path)

def test_abspath_issue3426(self):
# Check that abspath returns unicode when the arg is unicode
# with both ASCII and non-ASCII cwds.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixes :func:`os.path.normpath` to handle embedded null characters without truncating the path.
4 changes: 3 additions & 1 deletion Modules/posixmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -4552,7 +4552,9 @@ os__path_normpath_impl(PyObject *module, PyObject *path)
if (!buffer) {
return NULL;
}
PyObject *result = PyUnicode_FromWideChar(_Py_normpath(buffer, len), -1);
Py_ssize_t norm_len;
wchar_t *norm_path = _Py_normpath_and_size(buffer, len, &norm_len);
PyObject *result = PyUnicode_FromWideChar(norm_path, norm_len);
PyMem_Free(buffer);
return result;
}
Expand Down
29 changes: 21 additions & 8 deletions Python/fileutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -2179,12 +2179,14 @@
path, which will be within the original buffer. Guaranteed to not
make the path longer, and will not fail. 'size' is the length of
the path, if known. If -1, the first null character will be assumed
to be the end of the path. */
to be the end of the path. 'normsize' will be set to contain the
length of the resulting normalized path. */
wchar_t *
_Py_normpath(wchar_t *path, Py_ssize_t size)
_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize)
{
assert(path != NULL);
if (!path[0] || size == 0) {
if (!path[0] && size < 0 || size == 0) {

Check warning on line 2188 in Python/fileutils.c

View workflow job for this annotation

GitHub Actions / Address sanitizer

suggest parentheses around ‘&&’ within ‘||’ [-Wparentheses]

Check warning on line 2188 in Python/fileutils.c

View workflow job for this annotation

GitHub Actions / Ubuntu

suggest parentheses around ‘&&’ within ‘||’ [-Wparentheses]
*normsize = 0;
return path;
}
wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
Expand Down Expand Up @@ -2233,11 +2235,7 @@
*p2++ = lastC = *p1;
}
}
if (sepCount) {
minP2 = p2; // Invalid path
} else {
minP2 = p2 - 1; // Absolute path has SEP at minP2
}
minP2 = p2 - 1;
}
#else
// Skip past two leading SEPs
Expand Down Expand Up @@ -2297,13 +2295,28 @@
while (--p2 != minP2 && *p2 == SEP) {
*p2 = L'\0';
}
} else {
--p2;
}
*normsize = p2 - path + 1;
#undef SEP_OR_END
#undef IS_SEP
#undef IS_END
return path;
}

/* In-place path normalisation. Returns the start of the normalized
path, which will be within the original buffer. Guaranteed to not
make the path longer, and will not fail. 'size' is the length of
the path, if known. If -1, the first null character will be assumed
to be the end of the path. */
wchar_t *
_Py_normpath(wchar_t *path, Py_ssize_t size)
{
Py_ssize_t norm_length;
return _Py_normpath_and_size(path, size, &norm_length);
}


/* Get the current directory. buflen is the buffer size in wide characters
including the null character. Decode the path from the locale encoding.
Expand Down
Loading