Skip to content

Commit

Permalink
softreboot: add survivors.slice for surviving services
Browse files Browse the repository at this point in the history
Add a new special top-level slice, survivors.slice. Units nested under
this slice will not be stopped/restarted during a soft reboot
operation. By defining this interface, we make the promise to users
that we will do what's necessary to ensure the functionality is
provided as expected, regardless of what changes under the hood.

Implement the killing spree on soft reboot at the cgroup level,
iterating over the remaining top-level cgroups and sending SIGTERM and
SIGKILL to all apart from init.scope and survivors.slice. For this
reason, this is supported only with cgroupsv2, but we intend to
deprecated cgroupsv1 anyway.

At the moment this interface implies the following:

- not being sent SIGTERM/SIGKILL on final shutdown
- no conflict/ordering with shutdown/softreboot targets
- conflict/ordering with poweroff/halt/reboot/kexec targets
- ignored on isolate when booting/shutting down the manager
  • Loading branch information
bluca committed Jul 28, 2023
1 parent afe7026 commit f8a1a49
Show file tree
Hide file tree
Showing 19 changed files with 250 additions and 73 deletions.
5 changes: 5 additions & 0 deletions man/systemd-soft-reboot.service.xml
Expand Up @@ -122,6 +122,11 @@
url="https://systemd.io/PORTABLE_SERVICES">Portable Services</ulink>, but make sure no resource from
the host's root filesystem is pinned via <varname>BindPaths=</varname> or similar unit settings,
otherwise the old root filesystem will be kept in memory as long as the unit is running.</para>

<para>If units shall be left running until the very end of shutdown during a soft reboot operation, but
shall be terminated regularly during other forms of shutdown, they need to be nested under
<varname>survivors.slice</varname>. This will ensure that soft reboot operations do not affect them, but
other types of reboot or shutdown stop them as expected.</para>
</refsect1>

<refsect1>
Expand Down
11 changes: 11 additions & 0 deletions man/systemd.special.xml
Expand Up @@ -1231,6 +1231,17 @@
<filename>systemd-machined.service</filename>.</para>
</listitem>
</varlistentry>

<varlistentry>
<term><filename>survivors.slice</filename></term>
<listitem>
<para>Units that are part of this slice (directly or nested) will not be stopped or restarted
during a soft reboot operation (see:
<citerefentry><refentrytitle>systemd-soft-reboot.service</refentrytitle><manvolnum>8</manvolnum></citerefentry>),
but will instead keep running uninterrupted. Note that this is only supported when booting with
the unified cgroup hierarchy.</para>
</listitem>
</varlistentry>
</variablelist>
</refsect2>
</refsect1>
Expand Down
58 changes: 58 additions & 0 deletions src/basic/cgroup-util.c
Expand Up @@ -471,6 +471,64 @@ int cg_kill_recursive(
return ret;
}

int cg_kill_all(void) {
_cleanup_closedir_ DIR *d = NULL;
char *cg_root;
int r;

if (cg_all_unified() <= 0)
return -EOPNOTSUPP;

r = cg_get_root_path(&cg_root);
if (r < 0)
return r;

r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, cg_root, &d);
if (r < 0)
return r;

for (;;) {
_cleanup_free_ char *node = NULL, *path = NULL;

r = cg_read_subgroup(d, &node);
if (r < 0)
return r;
if (r == 0)
break;

if (STR_IN_SET(node, SPECIAL_INIT_SCOPE, SPECIAL_SOFT_REBOOT_SURVIVORS_SLICE))
continue;

path = path_join(empty_to_root(cg_root), node);
if (!path)
return -ENOMEM;

log_debug("Killing cgroup %s", path);

r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER,
path,
SIGTERM,
/* flags= */ 0,
/* s= */ NULL,
/* log_kill= */ NULL,
/* userdata= */ NULL);
if (r < 0)
log_error_errno(r, "Failed to send SIGTERM to cgroup %s, ignoring: %m", path);

r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER,
path,
SIGKILL,
/* flags= */ 0,
/* s= */ NULL,
/* log_kill= */ NULL,
/* userdata= */ NULL);
if (r < 0)
log_error_errno(r, "Failed to send SIGKILL to cgroup %s, ignoring: %m", path);
}

return 0;
}

static const char *controller_to_dirname(const char *controller) {
assert(controller);

Expand Down
2 changes: 2 additions & 0 deletions src/basic/cgroup-util.h
Expand Up @@ -195,6 +195,8 @@ typedef int (*cg_kill_log_func_t)(pid_t pid, int sig, void *userdata);
int cg_kill(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata);
int cg_kill_kernel_sigkill(const char *controller, const char *path);
int cg_kill_recursive(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata);
/* Send SIGTERM + SIGKILL to all cgroups apart from init.scope and survivors.slice */
int cg_kill_all(void);

int cg_split_spec(const char *spec, char **ret_controller, char **ret_path);
int cg_mangle_path(const char *path, char **result);
Expand Down
1 change: 1 addition & 0 deletions src/basic/special.h
Expand Up @@ -111,6 +111,7 @@
#define SPECIAL_USER_SLICE "user.slice"
#define SPECIAL_MACHINE_SLICE "machine.slice"
#define SPECIAL_ROOT_SLICE "-.slice"
#define SPECIAL_SOFT_REBOOT_SURVIVORS_SLICE "survivors.slice"

/* The scope unit systemd itself lives in. */
#define SPECIAL_INIT_SCOPE "init.scope"
Expand Down
13 changes: 9 additions & 4 deletions src/core/main.c
Expand Up @@ -1869,11 +1869,16 @@ static int do_reexecute(

/* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
* SIGCHLD for them after deserializing. */
if (IN_SET(objective, MANAGER_SWITCH_ROOT, MANAGER_SOFT_REBOOT))
if (objective == MANAGER_SWITCH_ROOT)
broadcast_signal(SIGTERM, /* wait_for_exit= */ false, /* send_sighup= */ true, arg_default_timeout_stop_usec);
/* On soft reboot really make sure nothing is left */
if (objective == MANAGER_SOFT_REBOOT)
broadcast_signal(SIGKILL, /* wait_for_exit= */ false, /* send_sighup= */ false, arg_default_timeout_stop_usec);
else if (objective == MANAGER_SOFT_REBOOT) {
/* On soft reboot we want units in the survivors slice to not be interrupted,
* so instead of broadcating SIGTERM to all running processes, kill all top-level cgroups
* apart from our own scope and that special slice. */
r = cg_kill_all();
if (r < 0)
log_error_errno(r, "Failed to kill top level cgroups, ignoring: %m");
}

if (!switch_root_dir && objective == MANAGER_SOFT_REBOOT) {
/* If no switch root dir is specified, then check if /run/nextroot/ qualifies and use that */
Expand Down
10 changes: 8 additions & 2 deletions src/core/path.c
Expand Up @@ -342,12 +342,18 @@ static int path_add_default_dependencies(Path *p) {
return r;

if (MANAGER_IS_SYSTEM(UNIT(p)->manager)) {
r = unit_add_two_dependencies_by_name(UNIT(p), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
r = unit_add_two_dependencies_by_name(
UNIT(p),
UNIT_AFTER,
unit_survives_soft_reboot(UNIT(p)) ? _UNIT_DEPENDENCY_INVALID : UNIT_REQUIRES,
SPECIAL_SYSINIT_TARGET,
/* add_reference= */ true,
UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;
}

return unit_add_two_dependencies_by_name(UNIT(p), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
return unit_add_dependencies_on_real_shutdown_targets(UNIT(p));
}

static int path_add_trigger_dependencies(Path *p) {
Expand Down
15 changes: 3 additions & 12 deletions src/core/scope.c
Expand Up @@ -131,23 +131,14 @@ static void scope_set_state(Scope *s, ScopeState state) {
}

static int scope_add_default_dependencies(Scope *s) {
int r;

assert(s);

if (!UNIT(s)->default_dependencies)
return 0;

/* Make sure scopes are unloaded on shutdown */
r = unit_add_two_dependencies_by_name(
UNIT(s),
UNIT_BEFORE, UNIT_CONFLICTS,
SPECIAL_SHUTDOWN_TARGET, true,
UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;

return 0;
/* Make sure scopes are unloaded on shutdown, unless we are meant to survive soft reboot, in which
* case we need to conflict with non-soft-reboot targets. */
return unit_add_dependencies_on_real_shutdown_targets(UNIT(s));
}

static int scope_verify(Scope *s) {
Expand Down
21 changes: 14 additions & 7 deletions src/core/service.c
Expand Up @@ -722,11 +722,17 @@ static int service_add_default_dependencies(Service *s) {
* majority of services. */

if (MANAGER_IS_SYSTEM(UNIT(s)->manager)) {
/* First, pull in the really early boot stuff, and
* require it, so that we fail if we can't acquire
* it. */

r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
/* First, pull in the really early boot stuff, and require it, so that we fail if we can't
* acquire it. But only add ordering if this is meant to survive a soft reboot, otherwise
* it will be pulled down. */

r = unit_add_two_dependencies_by_name(
UNIT(s),
UNIT_AFTER,
unit_survives_soft_reboot(UNIT(s)) ? _UNIT_DEPENDENCY_INVALID : UNIT_REQUIRES,
SPECIAL_SYSINIT_TARGET,
/* add_reference= */ true,
UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;
} else {
Expand All @@ -746,8 +752,9 @@ static int service_add_default_dependencies(Service *s) {
if (r < 0)
return r;

/* Third, add us in for normal shutdown. */
return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
/* Third, add us in for normal shutdown, unless we are meant to survive soft reboot, in which case
* we need to conflict with non-soft-reboot targets. */
return unit_add_dependencies_on_real_shutdown_targets(UNIT(s));
}

static void service_fix_stdio(Service *s) {
Expand Down
14 changes: 3 additions & 11 deletions src/core/slice.c
Expand Up @@ -64,22 +64,14 @@ static int slice_add_parent_slice(Slice *s) {
}

static int slice_add_default_dependencies(Slice *s) {
int r;

assert(s);

if (!UNIT(s)->default_dependencies)
return 0;

/* Make sure slices are unloaded on shutdown */
r = unit_add_two_dependencies_by_name(
UNIT(s),
UNIT_BEFORE, UNIT_CONFLICTS,
SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;

return 0;
/* Make sure slices are unloaded on shutdown, unless we are meant to survive soft reboot, in which
* case we need to conflict with non-soft-reboot targets. */
return unit_add_dependencies_on_real_shutdown_targets(UNIT(s));
}

static int slice_verify(Slice *s) {
Expand Down
10 changes: 8 additions & 2 deletions src/core/socket.c
Expand Up @@ -278,12 +278,18 @@ static int socket_add_default_dependencies(Socket *s) {
return r;

if (MANAGER_IS_SYSTEM(UNIT(s)->manager)) {
r = unit_add_two_dependencies_by_name(UNIT(s), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
r = unit_add_two_dependencies_by_name(
UNIT(s),
UNIT_AFTER,
unit_survives_soft_reboot(UNIT(s)) ? _UNIT_DEPENDENCY_INVALID : UNIT_REQUIRES,
SPECIAL_SYSINIT_TARGET,
/* add_reference= */ true,
UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;
}

return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
return unit_add_dependencies_on_real_shutdown_targets(UNIT(s));
}

_pure_ static bool socket_has_exec(Socket *s) {
Expand Down
5 changes: 3 additions & 2 deletions src/core/target.c
Expand Up @@ -65,8 +65,9 @@ static int target_add_default_dependencies(Target *t) {
if (unit_has_name(UNIT(t), SPECIAL_SHUTDOWN_TARGET))
return 0;

/* Make sure targets are unloaded on shutdown */
return unit_add_two_dependencies_by_name(UNIT(t), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
/* Make sure targets are unloaded on shutdown, unless we are meant to survive soft reboot, in which
* case we need to conflict with non-soft-reboot targets. */
return unit_add_dependencies_on_real_shutdown_targets(UNIT(t));
}

static int target_load(Unit *u) {
Expand Down
10 changes: 8 additions & 2 deletions src/core/timer.c
Expand Up @@ -94,7 +94,13 @@ static int timer_add_default_dependencies(Timer *t) {
return r;

if (MANAGER_IS_SYSTEM(UNIT(t)->manager)) {
r = unit_add_two_dependencies_by_name(UNIT(t), UNIT_AFTER, UNIT_REQUIRES, SPECIAL_SYSINIT_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
r = unit_add_two_dependencies_by_name(
UNIT(t),
UNIT_AFTER,
unit_survives_soft_reboot(UNIT(t)) ? _UNIT_DEPENDENCY_INVALID : UNIT_REQUIRES,
SPECIAL_SYSINIT_TARGET,
/* add_reference= */ true,
UNIT_DEPENDENCY_DEFAULT);
if (r < 0)
return r;

Expand All @@ -112,7 +118,7 @@ static int timer_add_default_dependencies(Timer *t) {
}
}

return unit_add_two_dependencies_by_name(UNIT(t), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_SHUTDOWN_TARGET, true, UNIT_DEPENDENCY_DEFAULT);
return unit_add_dependencies_on_real_shutdown_targets(UNIT(t));
}

static int timer_add_trigger_dependencies(Timer *t) {
Expand Down
12 changes: 11 additions & 1 deletion src/core/transaction.c
Expand Up @@ -628,7 +628,12 @@ static int transaction_apply(
HASHMAP_FOREACH(j, m->jobs) {
assert(j->installed);

if (j->unit->ignore_on_isolate)
if (j->unit->ignore_on_isolate ||
(IN_SET(manager_state(j->unit->manager),
MANAGER_INITIALIZING,
MANAGER_STARTING,
MANAGER_STOPPING) &&
unit_survives_soft_reboot(j->unit)))
continue;

if (hashmap_contains(tr->jobs, j->unit))
Expand Down Expand Up @@ -1159,6 +1164,11 @@ static bool shall_stop_on_isolate(Transaction *tr, Unit *u) {
if (u->ignore_on_isolate)
return false;

/* When isolating on soft reboot, don't stop surviving units */
if (IN_SET(manager_state(u->manager), MANAGER_INITIALIZING, MANAGER_STARTING, MANAGER_STOPPING) &&
unit_survives_soft_reboot(u))
return false;

/* Is there already something listed for this? */
if (hashmap_contains(tr->jobs, u))
return false;
Expand Down

0 comments on commit f8a1a49

Please sign in to comment.