Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 26 additions & 14 deletions src/runtime/forkipc.c
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,23 @@ int fork_child_main(int ipc_fd,
#define LINUX_CLONE_CHILD_SETTID 0x01000000
/* LINUX_SIGCHLD defined in syscall_signal.h (included above) */

/* Namespace flags. elfuse implements no namespace isolation. Both
* sys_clone and sys_clone3 reject them.
*/
#define LINUX_CLONE_NEWTIME 0x00000080
#define LINUX_CLONE_NEWNS 0x00020000
#define LINUX_CLONE_NEWCGROUP 0x02000000
#define LINUX_CLONE_NEWUTS 0x04000000
#define LINUX_CLONE_NEWIPC 0x08000000
#define LINUX_CLONE_NEWUSER 0x10000000
#define LINUX_CLONE_NEWPID 0x20000000
#define LINUX_CLONE_NEWNET 0x40000000

#define LINUX_CLONE3_NS_FLAGS \
(LINUX_CLONE_NEWNS | LINUX_CLONE_NEWCGROUP | LINUX_CLONE_NEWUTS | \
LINUX_CLONE_NEWIPC | LINUX_CLONE_NEWUSER | LINUX_CLONE_NEWPID | \
LINUX_CLONE_NEWNET | LINUX_CLONE_NEWTIME)

/* CLONE_THREAD: create a new guest thread in the same VM. */

/* Arguments passed to the worker pthread. Allocated by sys_clone_thread, freed
Expand Down Expand Up @@ -1069,6 +1086,14 @@ int64_t sys_clone(hv_vcpu_t vcpu,
uint64_t ctid_gva,
bool verbose)
{
/* Namespaces are not implemented. CLONE_NEWTIME (0x80) lives in the CSIGNAL
* low byte and, like CLONE_INTO_CGROUP (bit 33) and set_tid, cannot be
* conveyed through clone(2) at all, so only the higher namespace bits are
* reachable here.
*/
if ((flags & ~(uint64_t) 0xff) & LINUX_CLONE3_NS_FLAGS)
return -LINUX_EINVAL;

/* CLONE_THREAD: create a new thread in the same VM (not a new process) */
if (flags & LINUX_CLONE_THREAD) {
return sys_clone_thread(vcpu, g, flags, child_stack, stack_map_start,
Expand Down Expand Up @@ -1507,22 +1532,9 @@ struct linux_clone_args {

#define CLONE_ARGS_SIZE_VER0 64 /* v5.3: first 8 fields (flags..tls) */

/* Unsupported clone3 flags: reject early rather than silently ignoring. */
/* Unsupported clone3-only flags: reject early rather than silently ignoring. */
#define LINUX_CLONE_PIDFD 0x00001000
#define LINUX_CLONE_INTO_CGROUP 0x200000000ULL
#define LINUX_CLONE_NEWNS 0x00020000
#define LINUX_CLONE_NEWCGROUP 0x02000000
#define LINUX_CLONE_NEWUTS 0x04000000
#define LINUX_CLONE_NEWIPC 0x08000000
#define LINUX_CLONE_NEWUSER 0x10000000
#define LINUX_CLONE_NEWPID 0x20000000
#define LINUX_CLONE_NEWNET 0x40000000
#define LINUX_CLONE_NEWTIME 0x00000080

#define LINUX_CLONE3_NS_FLAGS \
(LINUX_CLONE_NEWNS | LINUX_CLONE_NEWCGROUP | LINUX_CLONE_NEWUTS | \
LINUX_CLONE_NEWIPC | LINUX_CLONE_NEWUSER | LINUX_CLONE_NEWPID | \
LINUX_CLONE_NEWNET | LINUX_CLONE_NEWTIME)

int64_t sys_clone3(hv_vcpu_t vcpu,
guest_t *g,
Expand Down
30 changes: 30 additions & 0 deletions tests/test-clone3.c
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,35 @@ static void test_partial_deferred_stack_munmap(void)
munmap(reuse_stack, stack_size);
}

/* Test 15: legacy clone(2) rejects CLONE_NEW* namespace flags with EINVAL,
* matching clone3 (issue #44). Before the fix these flags fell through to a
* plain fork that falsely appeared to succeed. CLONE_NEWTIME is omitted: it
* lives in the CSIGNAL low byte and is not reachable through clone(2).
*/
static void test_legacy_clone_namespaces(void)
{
static const struct {
unsigned long flag;
const char *name;
} ns_flags[] = {
{0x00020000, "CLONE_NEWNS"}, {0x02000000, "CLONE_NEWCGROUP"},
{0x04000000, "CLONE_NEWUTS"}, {0x08000000, "CLONE_NEWIPC"},
{0x10000000, "CLONE_NEWUSER"}, {0x20000000, "CLONE_NEWPID"},
{0x40000000, "CLONE_NEWNET"},
};
for (size_t i = 0; i < sizeof(ns_flags) / sizeof(ns_flags[0]); i++) {
/* SIGCHLD (17) in the low byte makes this a fork-like clone. */
long ret = raw_clone(ns_flags[i].flag | 17, NULL, NULL, 0, NULL);
CHECK(ret == -22 /* EINVAL */,
"clone(%s) returned %ld (expected -EINVAL)", ns_flags[i].name,
ret);
if (ret == 0) /* defensive: a leaked child must not run the suite */
raw_syscall1(__NR_exit, 0);
else if (ret > 0)
raw_syscall4(__NR_wait4, ret, 0, 0, 0);
}
}

int main(int argc, char **argv)
{
if (argc > 1 && !strcmp(argv[1], "--clone3-vfork-child"))
Expand Down Expand Up @@ -687,6 +716,7 @@ int main(int argc, char **argv)
test_vfork_exec_unblocks_parent();
test_deferred_stack_munmap();
test_partial_deferred_stack_munmap();
test_legacy_clone_namespaces();

SUMMARY("test-clone3");
return fails > 0 ? 1 : 0;
Expand Down
Loading