Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Prefer ``close_range()`` to iterating over procfs for file descriptor
closing in :mod:`subprocess` for better performance.
88 changes: 70 additions & 18 deletions Modules/_posixsubprocess.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,18 +210,23 @@ safe_get_max_fd(void)
}


/* Close all file descriptors in the range from start_fd and higher
* except for those in py_fds_to_keep. If the range defined by
* [start_fd, safe_get_max_fd()) is large this will take a long
* time as it calls close() on EVERY possible fd.
/* Close all file descriptors in the given range except for those in
* py_fds_to_keep by invoking closer on each subrange.
*
* It isn't possible to know for sure what the max fd to go up to
* is for processes with the capability of raising their maximum.
* If end_fd == -1, it's guessed via safe_get_max_fd(), but it isn't
* possible to know for sure what the max fd to go up to is for
* processes with the capability of raising their maximum, or in case
* a process opened a high fd and then lowered its maximum.
*/
static void
_close_fds_by_brute_force(long start_fd, PyObject *py_fds_to_keep)
static int
_close_range_except(int start_fd,
int end_fd,
PyObject *py_fds_to_keep,
int (*closer)(int, int))
{
long end_fd = safe_get_max_fd();
if (end_fd == -1) {
end_fd = Py_MIN(safe_get_max_fd(), INT_MAX);
}
Py_ssize_t num_fds_to_keep = PyTuple_GET_SIZE(py_fds_to_keep);
Py_ssize_t keep_seq_idx;
/* As py_fds_to_keep is sorted we can loop through the list closing
Expand All @@ -231,15 +236,17 @@ _close_fds_by_brute_force(long start_fd, PyObject *py_fds_to_keep)
int keep_fd = PyLong_AsLong(py_keep_fd);
if (keep_fd < start_fd)
continue;
_Py_closerange(start_fd, keep_fd - 1);
if (closer(start_fd, keep_fd - 1) != 0)
return -1;
start_fd = keep_fd + 1;
}
if (start_fd <= end_fd) {
_Py_closerange(start_fd, end_fd);
if (closer(start_fd, end_fd) != 0)
return -1;
}
return 0;
}


#if defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)
/* It doesn't matter if d_name has room for NAME_MAX chars; we're using this
* only to read a directory of short file descriptor number names. The kernel
Expand All @@ -255,6 +262,16 @@ struct linux_dirent64 {
char d_name[256]; /* Filename (null-terminated) */
};

static int
_brute_force_closer(int first, int last)
{
for (int i = first; i <= last; i++) {
/* Ignore errors */
(void)close(i);
}
return 0;
}

/* Close all open file descriptors in the range from start_fd and higher
* Do not close any in the sorted py_fds_to_keep list.
*
Expand All @@ -278,7 +295,7 @@ _close_open_fds_safe(int start_fd, PyObject* py_fds_to_keep)
fd_dir_fd = _Py_open_noraise(FD_DIR, O_RDONLY);
if (fd_dir_fd == -1) {
/* No way to get a list of open fds. */
_close_fds_by_brute_force(start_fd, py_fds_to_keep);
_close_range_except(start_fd, -1, py_fds_to_keep, _brute_force_closer);
return;
} else {
char buffer[sizeof(struct linux_dirent64)];
Expand Down Expand Up @@ -306,10 +323,16 @@ _close_open_fds_safe(int start_fd, PyObject* py_fds_to_keep)
}
}

#define _close_open_fds _close_open_fds_safe
#define _close_open_fds_fallback _close_open_fds_safe

#else /* NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */

static int
_unsafe_closer(int first, int last)
{
_Py_closerange(first, last);
return 0;
}

/* Close all open file descriptors from start_fd and higher.
* Do not close any in the sorted py_fds_to_keep tuple.
Expand All @@ -325,7 +348,7 @@ _close_open_fds_safe(int start_fd, PyObject* py_fds_to_keep)
* http://womble.decadent.org.uk/readdir_r-advisory.html
*/
static void
_close_open_fds_maybe_unsafe(long start_fd, PyObject* py_fds_to_keep)
_close_open_fds_maybe_unsafe(int start_fd, PyObject* py_fds_to_keep)
{
DIR *proc_fd_dir;
#ifndef HAVE_DIRFD
Expand All @@ -348,7 +371,7 @@ _close_open_fds_maybe_unsafe(long start_fd, PyObject* py_fds_to_keep)
proc_fd_dir = opendir(FD_DIR);
if (!proc_fd_dir) {
/* No way to get a list of open fds. */
_close_fds_by_brute_force(start_fd, py_fds_to_keep);
_close_range_except(start_fd, -1, py_fds_to_keep, _unsafe_closer);
} else {
struct dirent *dir_entry;
#ifdef HAVE_DIRFD
Expand All @@ -369,16 +392,45 @@ _close_open_fds_maybe_unsafe(long start_fd, PyObject* py_fds_to_keep)
}
if (errno) {
/* readdir error, revert behavior. Highly Unlikely. */
_close_fds_by_brute_force(start_fd, py_fds_to_keep);
_close_range_except(start_fd, -1, py_fds_to_keep, _unsafe_closer);
}
closedir(proc_fd_dir);
}
}

#define _close_open_fds _close_open_fds_maybe_unsafe
#define _close_open_fds_fallback _close_open_fds_maybe_unsafe

#endif /* else NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */

/* We can use close_range() library function only if it's known to be
* async-signal-safe.
*
* On Linux, glibc explicitly documents it to be a thin wrapper over
* the system call, and other C libraries are likely to follow glibc.
*/
#if defined(HAVE_CLOSE_RANGE) && \
(defined(__linux__) || defined(__FreeBSD__))
#define HAVE_ASYNC_SAFE_CLOSE_RANGE

static int
_close_range_closer(int first, int last)
{
return close_range(first, last, 0);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does glibc 2.34 close_range() actually do when the running kernel does not support the close_range system call? does it return -1 with errno=ENOSYS [desirable] or does it implement its own potentially poor unsafe version of close_range? The Linux/glibc documentation fails to be explicit about this. Add a comment describing the findings to the code here for future reference. If it implements its own instead of just surfacing the syscall failure, we'd probably be better off making the syscall directly as we don't want to depend on such a glibc provided fallback when we've got our own tried and true tested efficient one.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does glibc 2.34 close_range() actually do when the running kernel does not support the close_range system call? does it return -1 with errno=ENOSYS [desirable] or does it implement its own potentially poor unsafe version of close_range?

I'd checked that before opening the PR. The glibc manual (which is not the same as the kernel man pages project you linked to) explicitly states that close_range() doesn't provide any fallbacks. This is in contrast to closefrom(), which not only provides a procfs-based fallback, but aborts if that fallback fails, and hence would be unsuitable not only for subprocess, but for os.closerange() as well.

Glibc, however, does have a surprise: it contains a close_range() fallback for GNU Hurd, which doesn't look async-signal-safe at all, and also a generic fallback iterating over all fds up to the fd limit (for mostly theoretical non-Linux and non-Hurd builds).

Other Linux C libraries that I've checked (musl, bionic, uclibc-ng) don't currently provide close_range() wrapper, but I think they will follow glibc if/when they provide it.

Note that close_range() is also present on FreeBSD, where it's a system call too.

Overall, I think it makes sense to enable the new code for known-good platforms only. I've added extra ifdefs.

An alternative to platform whitelisting is to use a direct syscall (and assume that whenever we have SYS_close_range, the interface is what we expect).

}
#endif

static void
_close_open_fds(int start_fd, PyObject* py_fds_to_keep)
{
#ifdef HAVE_ASYNC_SAFE_CLOSE_RANGE
if (_close_range_except(
start_fd, INT_MAX, py_fds_to_keep,
_close_range_closer) == 0) {
return;
}
#endif
_close_open_fds_fallback(start_fd, py_fds_to_keep);
}

#ifdef VFORK_USABLE
/* Reset dispositions for all signals to SIG_DFL except for ignored
Expand Down