Skip to content

Commit

Permalink
core: add new new bus call for migrating foreign processes to scope/s…
Browse files Browse the repository at this point in the history
…ervice units

This adds a new bus call to service and scope units called
AttachProcesses() that moves arbitrary processes into the cgroup of the
unit. The primary user for this new API is systemd itself: the systemd
--user instance uses this call of the systemd --system instance to
migrate processes if itself gets the request to migrate processes and
the kernel refuses this due to access restrictions.

The primary use-case of this is to make "systemd-run --scope --user …"
invoked from user session scopes work correctly on pure cgroupsv2
environments. There, the kernel refuses to migrate processes between two
unprivileged-owned cgroups unless the requestor as well as the ownership
of the closest parent cgroup all match. This however is not the case
between the session-XYZ.scope unit of a login session and the
user@ABC.service of the systemd --user instance.

The new logic always tries to move the processes on its own, but if
that doesn't work when being the user manager, then the system manager
is asked to do it instead.

The new operation is relatively restrictive: it will only allow to move
the processes like this if the caller is root, or the UID of the target
unit, caller and process all match. Note that this means that
unprivileged users cannot attach processes to scope units, as those do
not have "owning" users (i.e. they have now User= field).

Fixes: systemd#3388
  • Loading branch information
poettering committed Feb 9, 2018
1 parent fd7f21b commit 7c510fe
Show file tree
Hide file tree
Showing 10 changed files with 347 additions and 17 deletions.
146 changes: 137 additions & 9 deletions src/core/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "alloc-util.h"
#include "blockdev-util.h"
#include "bpf-firewall.h"
#include "bus-error.h"
#include "cgroup-util.h"
#include "cgroup.h"
#include "fd-util.h"
Expand Down Expand Up @@ -1303,13 +1304,12 @@ void unit_update_cgroup_members_masks(Unit *u) {
}
}

static const char *migrate_callback(CGroupMask mask, void *userdata) {
Unit *u = userdata;
const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask) {

assert(mask != 0);
assert(u);
/* Returns the realized cgroup path of the specified unit where all specified controllers are available. */

while (u) {

if (u->cgroup_path &&
u->cgroup_realized &&
(u->cgroup_realized_mask & mask) == mask)
Expand All @@ -1321,6 +1321,10 @@ static const char *migrate_callback(CGroupMask mask, void *userdata) {
return NULL;
}

static const char *migrate_callback(CGroupMask mask, void *userdata) {
return unit_get_realized_cgroup_path(userdata, mask);
}

char *unit_default_cgroup_path(Unit *u) {
_cleanup_free_ char *escaped = NULL, *slice = NULL;
int r;
Expand Down Expand Up @@ -1503,19 +1507,143 @@ static int unit_create_cgroup(
return 0;
}

int unit_attach_pids_to_cgroup(Unit *u) {
static int unit_attach_pid_to_cgroup_via_bus(Unit *u, pid_t pid, const char *suffix_path) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
char *pp;
int r;

assert(u);

r = unit_realize_cgroup(u);
if (MANAGER_IS_SYSTEM(u->manager))
return -EINVAL;

if (!u->manager->system_bus)
return -EIO;

if (!u->cgroup_path)
return -EINVAL;

/* Determine this unit's cgroup path relative to our cgroup root */
pp = path_startswith(u->cgroup_path, u->manager->cgroup_root);
if (!pp)
return -EINVAL;

if (!isempty(suffix_path))
pp = strjoina("/", pp, suffix_path);
else
pp = strjoina("/", pp);

path_kill_slashes(pp);

r = sd_bus_call_method(u->manager->system_bus,
"org.freedesktop.systemd1",
"/org/freedesktop/systemd1",
"org.freedesktop.systemd1.Manager",
"AttachProcessesToUnit",
&error, NULL,
"ssau",
NULL /* empty unit name means client's unit, i.e. us */, pp, 1, (uint32_t) pid);
if (r < 0)
return r;
return log_unit_debug_errno(u, r, "Failed to attach unit process via the bus: %s", bus_error_message(&error, r));

return 0;
}

int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) {
CGroupMask delegated_mask;
const char *p;
Iterator i;
void *pidp;
int r, q;

assert(u);

if (!UNIT_HAS_CGROUP_CONTEXT(u))
return -EINVAL;

if (set_isempty(pids))
return 0;

r = cg_attach_many_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->pids, migrate_callback, u);
r = unit_realize_cgroup(u);
if (r < 0)
return r;

return 0;
if (isempty(suffix_path))
p = u->cgroup_path;
else
p = strjoina(u->cgroup_path, "/", suffix_path);

delegated_mask = unit_get_delegate_mask(u);

r = 0;
SET_FOREACH(pidp, pids, i) {
pid_t pid = PTR_TO_PID(pidp);
CGroupController c;

/* First, attach the PID to the main cgroup hierarchy */
q = cg_attach(SYSTEMD_CGROUP_CONTROLLER, p, pid);
if (IN_SET(q, -EPERM, -EACCES) && MANAGER_IS_USER(u->manager)) {
/* If we are in a user instance, and we can't moe the process ourselves, let's ask the system
* instance about it instead. Since it's more privileged it might be able to move the process
* across the leaves of a subtree who's top node is not owned by us. */

q = unit_attach_pid_to_cgroup_via_bus(u, pid, suffix_path);
if (q < 0)
log_unit_debug_errno(u, q, "Going via the bus didn't work either, ignoring: %m");
if (q >= 0)
continue;

/* Propagate original error */
q = -EPERM;
}
if (q < 0) {
log_unit_debug_errno(u, q, "Couldn't move process " PID_FMT " to requested cgroup %s: %m", pid, p);
if (r == 0)
r = q; /* Remember first error */
continue;
}

q = cg_all_unified();
if (q < 0)
return q;
if (q > 0)
continue;

/* In the legacy hierarchy, attach the process to the request cgroup if possible, and if not to the
* innermost realized one */

for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
const char *realized;

if (!(u->manager->cgroup_supported & bit))
continue;

/* If this controller is delegated and realized, honour the caller's request for the cgroup suffix. */
if (delegated_mask & u->cgroup_realized_mask & bit) {
q = cg_attach(cgroup_controller_to_string(c), p, pid);
if (q >= 0)
continue; /* Success! */

log_unit_debug_errno(u, q, "Failed to attach PID " PID_FMT " to requested cgroup %s in controller %s, falling back to unit's cgroup: %m",
pid, p, cgroup_controller_to_string(c));
}

/* So this controller is either not delegate or realized, or something else weird happened. In
* that case let's attach the PID at least to the closest cgroup up the tree that is
* realized. */
realized = unit_get_realized_cgroup_path(u, bit);
if (!realized)
continue; /* Not even realized in the root slice? Then let's not bother */

q = cg_attach(cgroup_controller_to_string(c), realized, pid);
if (q < 0)
log_unit_debug_errno(u, q, "Failed to attach PID " PID_FMT " to realized cgroup %s in controller %s, ignoring: %m",
pid, realized, cgroup_controller_to_string(c));
}
}

return r;
}

static void cgroup_xattr_apply(Unit *u) {
Expand Down
3 changes: 2 additions & 1 deletion src/core/cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ bool unit_get_needs_bpf(Unit *u);

void unit_update_cgroup_members_masks(Unit *u);

const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask);
char *unit_default_cgroup_path(Unit *u);
int unit_set_cgroup_path(Unit *u, const char *path);
int unit_pick_cgroup_path(Unit *u);
Expand All @@ -178,7 +179,7 @@ int unit_watch_cgroup(Unit *u);

void unit_add_to_cgroup_empty_queue(Unit *u);

int unit_attach_pids_to_cgroup(Unit *u);
int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path);

int manager_setup_cgroup(Manager *m);
void manager_shutdown_cgroup(Manager *m, bool delete);
Expand Down
21 changes: 21 additions & 0 deletions src/core/dbus-manager.c
Original file line number Diff line number Diff line change
Expand Up @@ -863,6 +863,26 @@ static int method_get_unit_processes(sd_bus_message *message, void *userdata, sd
return bus_unit_method_get_processes(message, u, error);
}

static int method_attach_processes_to_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
Manager *m = userdata;
const char *name;
Unit *u;
int r;

assert(message);
assert(m);

r = sd_bus_message_read(message, "s", &name);
if (r < 0)
return r;

r = bus_get_unit_by_name(m, message, name, &u, error);
if (r < 0)
return r;

return bus_unit_method_attach_processes(message, u, error);
}

static int transient_unit_from_message(
Manager *m,
sd_bus_message *message,
Expand Down Expand Up @@ -2504,6 +2524,7 @@ const sd_bus_vtable bus_manager_vtable[] = {
SD_BUS_METHOD("UnrefUnit", "s", NULL, method_unref_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("StartTransientUnit", "ssa(sv)a(sa(sv))", "o", method_start_transient_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("GetUnitProcesses", "s", "a(sus)", method_get_unit_processes, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("AttachProcessesToUnit", "ssau", NULL, method_attach_processes_to_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("GetJob", "u", "o", method_get_job, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("GetJobAfter", "u", "a(usssoo)", method_get_job_waiting, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("GetJobBefore", "u", "a(usssoo)", method_get_job_waiting, SD_BUS_VTABLE_UNPRIVILEGED),
Expand Down
32 changes: 26 additions & 6 deletions src/core/dbus-scope.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,17 +89,39 @@ static int bus_scope_set_transient_property(
return bus_set_transient_usec(UNIT(s), name, &s->timeout_stop_usec, message, flags, error);

if (streq(name, "PIDs")) {
_cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
unsigned n = 0;
uint32_t pid;

r = sd_bus_message_enter_container(message, 'a', "u");
if (r < 0)
return r;

while ((r = sd_bus_message_read(message, "u", &pid)) > 0) {
for (;;) {
uint32_t upid;
pid_t pid;

if (pid <= 1)
return -EINVAL;
r = sd_bus_message_read(message, "u", &upid);
if (r < 0)
return r;
if (r == 0)
break;

if (upid == 0) {
if (!creds) {
r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
if (r < 0)
return r;
}

r = sd_bus_creds_get_pid(creds, &pid);
if (r < 0)
return r;
} else
pid = (uid_t) upid;

r = unit_pid_attachable(UNIT(s), pid, error);
if (r < 0)
return r;

if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
r = unit_watch_pid(UNIT(s), pid);
Expand All @@ -109,8 +131,6 @@ static int bus_scope_set_transient_property(

n++;
}
if (r < 0)
return r;

r = sd_bus_message_exit_container(message);
if (r < 0)
Expand Down
Loading

0 comments on commit 7c510fe

Please sign in to comment.