Skip to content

Commit

Permalink
core: add new new bus call for migrating foreign processes to scope/s…
Browse files Browse the repository at this point in the history
…ervice units

This adds a new bus call to service and scope units called
AttachProcesses() that moves arbitrary processes into the cgroup of the
unit. The primary user for this new API is systemd itself: the systemd
--user instance uses this call of the systemd --system instance to
migrate processes if itself gets the request to migrate processes and
the kernel refuses this due to access restrictions.

The primary use-case of this is to make "systemd-run --scope --user …"
invoked from user session scopes work correctly on pure cgroupsv2
environments. There, the kernel refuses to migrate processes between two
unprivileged-owned cgroups unless the requestor as well as the ownership
of the closest parent cgroup all match. This however is not the case
between the session-XYZ.scope unit of a login session and the
user@ABC.service of the systemd --user instance.

The new logic always tries to move the processes on its own, but if
that doesn't work when being the user manager, then the system manager
is asked to do it instead.

The new operation is relatively restrictive: it will only allow to move
the processes like this if the caller is root, or the UID of the target
unit, caller and process all match. Note that this means that
unprivileged users cannot attach processes to scope units, as those do
not have "owning" users (i.e. they have now User= field).

Fixes: systemd#3388
  • Loading branch information
poettering committed Feb 9, 2018
1 parent d379862 commit dcf5e00
Show file tree
Hide file tree
Showing 10 changed files with 346 additions and 17 deletions.
145 changes: 136 additions & 9 deletions src/core/cgroup.c
Expand Up @@ -24,6 +24,7 @@
#include "alloc-util.h"
#include "blockdev-util.h"
#include "bpf-firewall.h"
#include "bus-error.h"
#include "cgroup-util.h"
#include "cgroup.h"
#include "fd-util.h"
Expand Down Expand Up @@ -1303,13 +1304,12 @@ void unit_update_cgroup_members_masks(Unit *u) {
}
}

static const char *migrate_callback(CGroupMask mask, void *userdata) {
Unit *u = userdata;
const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask) {

assert(mask != 0);
assert(u);
/* Returns the realized cgroup path of the specified unit where all specified controllers are available. */

while (u) {

if (u->cgroup_path &&
u->cgroup_realized &&
(u->cgroup_realized_mask & mask) == mask)
Expand All @@ -1321,6 +1321,10 @@ static const char *migrate_callback(CGroupMask mask, void *userdata) {
return NULL;
}

static const char *migrate_callback(CGroupMask mask, void *userdata) {
return unit_get_realized_cgroup_path(userdata, mask);
}

char *unit_default_cgroup_path(Unit *u) {
_cleanup_free_ char *escaped = NULL, *slice = NULL;
int r;
Expand Down Expand Up @@ -1503,19 +1507,142 @@ static int unit_create_cgroup(
return 0;
}

int unit_attach_pids_to_cgroup(Unit *u) {
static int unit_attach_pid_to_cgroup_via_bus(Unit *u, pid_t pid, const char *suffix_path) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
char *pp;
int r;

assert(u);

r = unit_realize_cgroup(u);
if (MANAGER_IS_SYSTEM(u->manager))
return -EINVAL;

if (!u->manager->system_bus)
return -EIO;

if (!u->cgroup_path)
return -EINVAL;

/* Determine this unit's cgroup path relative to our cgroup root */
pp = path_startswith(u->cgroup_path, u->manager->cgroup_root);
if (!pp)
return -EINVAL;

pp = strjoina("/", pp, suffix_path);
path_kill_slashes(pp);

r = sd_bus_call_method(u->manager->system_bus,
"org.freedesktop.systemd1",
"/org/freedesktop/systemd1",
"org.freedesktop.systemd1.Manager",
"AttachProcessesToUnit",
&error, NULL,
"ssau",
NULL /* empty unit name means client's unit, i.e. us */, pp, 1, (uint32_t) pid);
if (r < 0)
return r;
return log_unit_debug_errno(u, r, "Failed to attach unit process " PID_FMT " via the bus: %s", pid, bus_error_message(&error, r));

return 0;
}

int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) {
CGroupMask delegated_mask;
const char *p;
Iterator i;
void *pidp;
int r, q;

assert(u);

if (!UNIT_HAS_CGROUP_CONTEXT(u))
return -EINVAL;

if (set_isempty(pids))
return 0;

r = cg_attach_many_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->pids, migrate_callback, u);
r = unit_realize_cgroup(u);
if (r < 0)
return r;

return 0;
if (isempty(suffix_path))
p = u->cgroup_path;
else
p = strjoina(u->cgroup_path, "/", suffix_path);

delegated_mask = unit_get_delegate_mask(u);

r = 0;
SET_FOREACH(pidp, pids, i) {
pid_t pid = PTR_TO_PID(pidp);
CGroupController c;

/* First, attach the PID to the main cgroup hierarchy */
q = cg_attach(SYSTEMD_CGROUP_CONTROLLER, p, pid);
if (q < 0) {
log_unit_debug_errno(u, q, "Couldn't move process " PID_FMT " to requested cgroup '%s': %m", pid, p);

if (MANAGER_IS_USER(u->manager) && IN_SET(q, -EPERM, -EACCES)) {
int z;

/* If we are in a user instance, and we can't move the process ourselves due to
* permission problems, let's ask the system instance about it instead. Since it's more
* privileged it might be able to move the process across the leaves of a subtree who's
* top node is not owned by us. */

z = unit_attach_pid_to_cgroup_via_bus(u, pid, suffix_path);
if (z < 0)
log_unit_debug_errno(u, z, "Couldn't move process " PID_FMT " to requested cgroup '%s' via the system bus either: %m", pid, p);
else
continue; /* When the bus thing worked via the bus we are fully done for this PID. */
}

if (r >= 0)
r = q; /* Remember first error */

continue;
}

q = cg_all_unified();
if (q < 0)
return q;
if (q > 0)
continue;

/* In the legacy hierarchy, attach the process to the request cgroup if possible, and if not to the
* innermost realized one */

for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
const char *realized;

if (!(u->manager->cgroup_supported & bit))
continue;

/* If this controller is delegated and realized, honour the caller's request for the cgroup suffix. */
if (delegated_mask & u->cgroup_realized_mask & bit) {
q = cg_attach(cgroup_controller_to_string(c), p, pid);
if (q >= 0)
continue; /* Success! */

log_unit_debug_errno(u, q, "Failed to attach PID " PID_FMT " to requested cgroup %s in controller %s, falling back to unit's cgroup: %m",
pid, p, cgroup_controller_to_string(c));
}

/* So this controller is either not delegate or realized, or something else weird happened. In
* that case let's attach the PID at least to the closest cgroup up the tree that is
* realized. */
realized = unit_get_realized_cgroup_path(u, bit);
if (!realized)
continue; /* Not even realized in the root slice? Then let's not bother */

q = cg_attach(cgroup_controller_to_string(c), realized, pid);
if (q < 0)
log_unit_debug_errno(u, q, "Failed to attach PID " PID_FMT " to realized cgroup %s in controller %s, ignoring: %m",
pid, realized, cgroup_controller_to_string(c));
}
}

return r;
}

static void cgroup_xattr_apply(Unit *u) {
Expand Down
3 changes: 2 additions & 1 deletion src/core/cgroup.h
Expand Up @@ -167,6 +167,7 @@ bool unit_get_needs_bpf(Unit *u);

void unit_update_cgroup_members_masks(Unit *u);

const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask);
char *unit_default_cgroup_path(Unit *u);
int unit_set_cgroup_path(Unit *u, const char *path);
int unit_pick_cgroup_path(Unit *u);
Expand All @@ -178,7 +179,7 @@ int unit_watch_cgroup(Unit *u);

void unit_add_to_cgroup_empty_queue(Unit *u);

int unit_attach_pids_to_cgroup(Unit *u);
int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path);

int manager_setup_cgroup(Manager *m);
void manager_shutdown_cgroup(Manager *m, bool delete);
Expand Down
21 changes: 21 additions & 0 deletions src/core/dbus-manager.c
Expand Up @@ -863,6 +863,26 @@ static int method_get_unit_processes(sd_bus_message *message, void *userdata, sd
return bus_unit_method_get_processes(message, u, error);
}

static int method_attach_processes_to_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
Manager *m = userdata;
const char *name;
Unit *u;
int r;

assert(message);
assert(m);

r = sd_bus_message_read(message, "s", &name);
if (r < 0)
return r;

r = bus_get_unit_by_name(m, message, name, &u, error);
if (r < 0)
return r;

return bus_unit_method_attach_processes(message, u, error);
}

static int transient_unit_from_message(
Manager *m,
sd_bus_message *message,
Expand Down Expand Up @@ -2504,6 +2524,7 @@ const sd_bus_vtable bus_manager_vtable[] = {
SD_BUS_METHOD("UnrefUnit", "s", NULL, method_unref_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("StartTransientUnit", "ssa(sv)a(sa(sv))", "o", method_start_transient_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("GetUnitProcesses", "s", "a(sus)", method_get_unit_processes, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("AttachProcessesToUnit", "ssau", NULL, method_attach_processes_to_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("GetJob", "u", "o", method_get_job, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("GetJobAfter", "u", "a(usssoo)", method_get_job_waiting, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("GetJobBefore", "u", "a(usssoo)", method_get_job_waiting, SD_BUS_VTABLE_UNPRIVILEGED),
Expand Down
32 changes: 26 additions & 6 deletions src/core/dbus-scope.c
Expand Up @@ -89,17 +89,39 @@ static int bus_scope_set_transient_property(
return bus_set_transient_usec(UNIT(s), name, &s->timeout_stop_usec, message, flags, error);

if (streq(name, "PIDs")) {
_cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
unsigned n = 0;
uint32_t pid;

r = sd_bus_message_enter_container(message, 'a', "u");
if (r < 0)
return r;

while ((r = sd_bus_message_read(message, "u", &pid)) > 0) {
for (;;) {
uint32_t upid;
pid_t pid;

if (pid <= 1)
return -EINVAL;
r = sd_bus_message_read(message, "u", &upid);
if (r < 0)
return r;
if (r == 0)
break;

if (upid == 0) {
if (!creds) {
r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
if (r < 0)
return r;
}

r = sd_bus_creds_get_pid(creds, &pid);
if (r < 0)
return r;
} else
pid = (uid_t) upid;

r = unit_pid_attachable(UNIT(s), pid, error);
if (r < 0)
return r;

if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
r = unit_watch_pid(UNIT(s), pid);
Expand All @@ -109,8 +131,6 @@ static int bus_scope_set_transient_property(

n++;
}
if (r < 0)
return r;

r = sd_bus_message_exit_container(message);
if (r < 0)
Expand Down

0 comments on commit dcf5e00

Please sign in to comment.