Skip to content

Commit

Permalink
pid1: add ProtectSystem= as system-wide configuration, and default it…
Browse files Browse the repository at this point in the history
… to true in the initrd

This adds a new ProtectSystem= setting that mirrors the option of the
same of services, but in a more restrictive way. If enabled will remount
/usr/ to read-only, very early at boot. Takes a special value "auto"
(which is the default) which is equivalent to true in the initrd, and
false otherwise.

Unlike the per-service option we don't support full/strict modes, but
the door is open to eventually support that too if it makes sense. It's
not entirely trivial though as we have very little mounted this early,
and hence the mechanism might not apply 1:1. Hence in this PR is a
conservative first step.

My primary goal with this is to lock down initrds a bit, since they
conceptually are mostly immutable, but they are unpacked into a mutable
tmpfs. let's tighten the screws a bit on that, and at least make /usr/
immutable.

This is particularly nice on USIs (i.e. Unified System Images, that pack
a whole OS into a UKI without transitioning out of it), such as
diskomator.
  • Loading branch information
poettering committed Dec 6, 2023
1 parent 8e3dc73 commit ffc1ec7
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 1 deletion.
14 changes: 14 additions & 0 deletions man/systemd-system.conf.xml
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,20 @@
<xi:include href="version-info.xml" xpointer="v239"/></listitem>
</varlistentry>

<varlistentry>
<term><varname>ProtectSystem=</varname></term>

<listitem><para>Takes a boolean argument or the string <literal>auto</literal>. If set to true this
will remount <filename>/usr/</filename> read-only. If set to <literal>auto</literal> (the default)
and running in an initrd equivalent to true, otherwise false. This implements a restricted subset of
the per-unit setting of the same name, see
<citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry> for
details: currently, the <literal>full</literal> or <literal>struct</literal> values are not
supported.</para>

<xi:include href="version-info.xml" xpointer="v256"/></listitem>
</varlistentry>

<varlistentry>
<term><varname>SystemCallArchitectures=</varname></term>

Expand Down
75 changes: 74 additions & 1 deletion src/core/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
#include "manager-serialize.h"
#include "mkdir-label.h"
#include "mount-setup.h"
#include "mount-util.h"
#include "os-util.h"
#include "pager.h"
#include "parse-argument.h"
Expand Down Expand Up @@ -140,6 +141,7 @@ static char **arg_default_environment;
static char **arg_manager_environment;
static uint64_t arg_capability_bounding_set;
static bool arg_no_new_privs;
static int arg_protect_system;
static nsec_t arg_timer_slack_nsec;
static Set* arg_syscall_archs;
static FILE* arg_serialization;
Expand Down Expand Up @@ -610,6 +612,43 @@ static int config_parse_oom_score_adjust(
return 0;
}

static int config_parse_protect_system_pid1(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {

int *v = ASSERT_PTR(data), r;

/* This is modelled after the per-service ProtectSystem= setting, but a bit more restricted on one
* hand, and more automatic in another. i.e. we currently only support yes/no (not "strict" or
* "full"). And we will enable this automatically for the initrd unless configured otherwise.
*
* We might extend this later to match more closely what the per-service ProtectSystem= can do, but
* this is not trivial, due to ordering constraints: besides /usr/ we don't really have much mounted
* at the moment we enable this logic. */

if (isempty(rvalue) || streq(rvalue, "auto")) {
*v = -1;
return 0;
}

r = parse_boolean(rvalue);
if (r < 0) {
log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse ProtectSystem= argument '%s', ignoring: %m", rvalue);
return 0;
}

*v = r;
return 0;
}

static int parse_config_file(void) {
const ConfigTableItem items[] = {
{ "Manager", "LogLevel", config_parse_level2, 0, NULL },
Expand Down Expand Up @@ -637,6 +676,7 @@ static int parse_config_file(void) {
{ "Manager", "RuntimeWatchdogPreGovernor", config_parse_string, CONFIG_PARSE_STRING_SAFE, &arg_watchdog_pretimeout_governor },
{ "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
{ "Manager", "NoNewPrivileges", config_parse_bool, 0, &arg_no_new_privs },
{ "Manager", "ProtectSystem", config_parse_protect_system_pid1, 0, &arg_protect_system },
#if HAVE_SECCOMP
{ "Manager", "SystemCallArchitectures", config_parse_syscall_archs, 0, &arg_syscall_archs },
#else
Expand Down Expand Up @@ -1684,6 +1724,35 @@ static void initialize_core_pattern(bool skip_setup) {
arg_early_core_pattern);
}

static void apply_protect_system(bool skip_setup) {
int r;

if (skip_setup || getpid_cached() != 1 || arg_protect_system == 0)
return;

if (arg_protect_system < 0 && !in_initrd()) {
log_debug("ProtectSystem=auto selected, but not running in an initrd, skipping.");
return;
}

r = make_mount_point("/usr");
if (r < 0) {
log_warning_errno(r, "Failed to make /usr/ a mount point, ignoring: %m");
return;
}

if (mount_nofollow_verbose(
LOG_WARNING,
/* what= */ NULL,
"/usr",
/* fstype= */ NULL,
MS_BIND|MS_REMOUNT|MS_RDONLY,
/* options= */ NULL) < 0)
return;

log_info("Successfully made /usr/ read-only.");
}

static void update_cpu_affinity(bool skip_setup) {
_cleanup_free_ char *mask = NULL;

Expand Down Expand Up @@ -2531,6 +2600,7 @@ static void reset_arguments(void) {

arg_capability_bounding_set = CAP_MASK_UNSET;
arg_no_new_privs = false;
arg_protect_system = -1;
arg_timer_slack_nsec = NSEC_INFINITY;

arg_syscall_archs = set_free(arg_syscall_archs);
Expand Down Expand Up @@ -3040,9 +3110,12 @@ int main(int argc, char *argv[]) {
cmdline_take_random_seed();
}

/* A core pattern might have been specified via the cmdline. */
/* A core pattern might have been specified via the cmdline. */
initialize_core_pattern(skip_setup);

/* Make /usr/ read-only */
apply_protect_system(skip_setup);

/* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
log_close();

Expand Down
1 change: 1 addition & 0 deletions src/core/system.conf.in
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#WatchdogDevice=
#CapabilityBoundingSet=
#NoNewPrivileges=no
#ProtectSystem=auto
#SystemCallArchitectures=
#TimerSlackNSec=
#StatusUnitFormat={{STATUS_UNIT_FORMAT_DEFAULT_STR}}
Expand Down

0 comments on commit ffc1ec7

Please sign in to comment.