Skip to content

Commit ffc1ec7

Browse files
committed
pid1: add ProtectSystem= as system-wide configuration, and default it to true in the initrd
This adds a new ProtectSystem= setting that mirrors the option of the same of services, but in a more restrictive way. If enabled will remount /usr/ to read-only, very early at boot. Takes a special value "auto" (which is the default) which is equivalent to true in the initrd, and false otherwise. Unlike the per-service option we don't support full/strict modes, but the door is open to eventually support that too if it makes sense. It's not entirely trivial though as we have very little mounted this early, and hence the mechanism might not apply 1:1. Hence in this PR is a conservative first step. My primary goal with this is to lock down initrds a bit, since they conceptually are mostly immutable, but they are unpacked into a mutable tmpfs. let's tighten the screws a bit on that, and at least make /usr/ immutable. This is particularly nice on USIs (i.e. Unified System Images, that pack a whole OS into a UKI without transitioning out of it), such as diskomator.
1 parent 8e3dc73 commit ffc1ec7

File tree

3 files changed

+89
-1
lines changed

3 files changed

+89
-1
lines changed

man/systemd-system.conf.xml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,20 @@
289289
<xi:include href="version-info.xml" xpointer="v239"/></listitem>
290290
</varlistentry>
291291

292+
<varlistentry>
293+
<term><varname>ProtectSystem=</varname></term>
294+
295+
<listitem><para>Takes a boolean argument or the string <literal>auto</literal>. If set to true this
296+
will remount <filename>/usr/</filename> read-only. If set to <literal>auto</literal> (the default)
297+
and running in an initrd equivalent to true, otherwise false. This implements a restricted subset of
298+
the per-unit setting of the same name, see
299+
<citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry> for
300+
details: currently, the <literal>full</literal> or <literal>struct</literal> values are not
301+
supported.</para>
302+
303+
<xi:include href="version-info.xml" xpointer="v256"/></listitem>
304+
</varlistentry>
305+
292306
<varlistentry>
293307
<term><varname>SystemCallArchitectures=</varname></term>
294308

src/core/main.c

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
#include "manager-serialize.h"
6969
#include "mkdir-label.h"
7070
#include "mount-setup.h"
71+
#include "mount-util.h"
7172
#include "os-util.h"
7273
#include "pager.h"
7374
#include "parse-argument.h"
@@ -140,6 +141,7 @@ static char **arg_default_environment;
140141
static char **arg_manager_environment;
141142
static uint64_t arg_capability_bounding_set;
142143
static bool arg_no_new_privs;
144+
static int arg_protect_system;
143145
static nsec_t arg_timer_slack_nsec;
144146
static Set* arg_syscall_archs;
145147
static FILE* arg_serialization;
@@ -610,6 +612,43 @@ static int config_parse_oom_score_adjust(
610612
return 0;
611613
}
612614

615+
static int config_parse_protect_system_pid1(
616+
const char *unit,
617+
const char *filename,
618+
unsigned line,
619+
const char *section,
620+
unsigned section_line,
621+
const char *lvalue,
622+
int ltype,
623+
const char *rvalue,
624+
void *data,
625+
void *userdata) {
626+
627+
int *v = ASSERT_PTR(data), r;
628+
629+
/* This is modelled after the per-service ProtectSystem= setting, but a bit more restricted on one
630+
* hand, and more automatic in another. i.e. we currently only support yes/no (not "strict" or
631+
* "full"). And we will enable this automatically for the initrd unless configured otherwise.
632+
*
633+
* We might extend this later to match more closely what the per-service ProtectSystem= can do, but
634+
* this is not trivial, due to ordering constraints: besides /usr/ we don't really have much mounted
635+
* at the moment we enable this logic. */
636+
637+
if (isempty(rvalue) || streq(rvalue, "auto")) {
638+
*v = -1;
639+
return 0;
640+
}
641+
642+
r = parse_boolean(rvalue);
643+
if (r < 0) {
644+
log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse ProtectSystem= argument '%s', ignoring: %m", rvalue);
645+
return 0;
646+
}
647+
648+
*v = r;
649+
return 0;
650+
}
651+
613652
static int parse_config_file(void) {
614653
const ConfigTableItem items[] = {
615654
{ "Manager", "LogLevel", config_parse_level2, 0, NULL },
@@ -637,6 +676,7 @@ static int parse_config_file(void) {
637676
{ "Manager", "RuntimeWatchdogPreGovernor", config_parse_string, CONFIG_PARSE_STRING_SAFE, &arg_watchdog_pretimeout_governor },
638677
{ "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
639678
{ "Manager", "NoNewPrivileges", config_parse_bool, 0, &arg_no_new_privs },
679+
{ "Manager", "ProtectSystem", config_parse_protect_system_pid1, 0, &arg_protect_system },
640680
#if HAVE_SECCOMP
641681
{ "Manager", "SystemCallArchitectures", config_parse_syscall_archs, 0, &arg_syscall_archs },
642682
#else
@@ -1684,6 +1724,35 @@ static void initialize_core_pattern(bool skip_setup) {
16841724
arg_early_core_pattern);
16851725
}
16861726

1727+
static void apply_protect_system(bool skip_setup) {
1728+
int r;
1729+
1730+
if (skip_setup || getpid_cached() != 1 || arg_protect_system == 0)
1731+
return;
1732+
1733+
if (arg_protect_system < 0 && !in_initrd()) {
1734+
log_debug("ProtectSystem=auto selected, but not running in an initrd, skipping.");
1735+
return;
1736+
}
1737+
1738+
r = make_mount_point("/usr");
1739+
if (r < 0) {
1740+
log_warning_errno(r, "Failed to make /usr/ a mount point, ignoring: %m");
1741+
return;
1742+
}
1743+
1744+
if (mount_nofollow_verbose(
1745+
LOG_WARNING,
1746+
/* what= */ NULL,
1747+
"/usr",
1748+
/* fstype= */ NULL,
1749+
MS_BIND|MS_REMOUNT|MS_RDONLY,
1750+
/* options= */ NULL) < 0)
1751+
return;
1752+
1753+
log_info("Successfully made /usr/ read-only.");
1754+
}
1755+
16871756
static void update_cpu_affinity(bool skip_setup) {
16881757
_cleanup_free_ char *mask = NULL;
16891758

@@ -2531,6 +2600,7 @@ static void reset_arguments(void) {
25312600

25322601
arg_capability_bounding_set = CAP_MASK_UNSET;
25332602
arg_no_new_privs = false;
2603+
arg_protect_system = -1;
25342604
arg_timer_slack_nsec = NSEC_INFINITY;
25352605

25362606
arg_syscall_archs = set_free(arg_syscall_archs);
@@ -3040,9 +3110,12 @@ int main(int argc, char *argv[]) {
30403110
cmdline_take_random_seed();
30413111
}
30423112

3043-
/* A core pattern might have been specified via the cmdline. */
3113+
/* A core pattern might have been specified via the cmdline. */
30443114
initialize_core_pattern(skip_setup);
30453115

3116+
/* Make /usr/ read-only */
3117+
apply_protect_system(skip_setup);
3118+
30463119
/* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
30473120
log_close();
30483121

src/core/system.conf.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#WatchdogDevice=
4040
#CapabilityBoundingSet=
4141
#NoNewPrivileges=no
42+
#ProtectSystem=auto
4243
#SystemCallArchitectures=
4344
#TimerSlackNSec=
4445
#StatusUnitFormat={{STATUS_UNIT_FORMAT_DEFAULT_STR}}

0 commit comments

Comments
 (0)