Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for KSM #26621

Merged
merged 1 commit into from
Jun 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
24 changes: 24 additions & 0 deletions man/org.freedesktop.systemd1.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3167,6 +3167,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b ProtectHostname = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b MemoryKSM = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s NetworkNamespacePath = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s IPCNamespacePath = '...';
Expand Down Expand Up @@ -3732,6 +3734,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {

<!--property ProtectHostname is not documented!-->

<!--property MemoryKSM is not documented!-->

<!--property NetworkNamespacePath is not documented!-->

<!--property IPCNamespacePath is not documented!-->
Expand Down Expand Up @@ -4398,6 +4402,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {

<variablelist class="dbus-property" generated="True" extra-ref="ProtectHostname"/>

<variablelist class="dbus-property" generated="True" extra-ref="MemoryKSM"/>

<variablelist class="dbus-property" generated="True" extra-ref="NetworkNamespacePath"/>

<variablelist class="dbus-property" generated="True" extra-ref="IPCNamespacePath"/>
Expand Down Expand Up @@ -5177,6 +5183,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b ProtectHostname = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b MemoryKSM = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s NetworkNamespacePath = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s IPCNamespacePath = '...';
Expand Down Expand Up @@ -5754,6 +5762,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {

<!--property ProtectHostname is not documented!-->

<!--property MemoryKSM is not documented!-->

<!--property NetworkNamespacePath is not documented!-->

<!--property IPCNamespacePath is not documented!-->
Expand Down Expand Up @@ -6400,6 +6410,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {

<variablelist class="dbus-property" generated="True" extra-ref="ProtectHostname"/>

<variablelist class="dbus-property" generated="True" extra-ref="MemoryKSM"/>

<variablelist class="dbus-property" generated="True" extra-ref="NetworkNamespacePath"/>

<variablelist class="dbus-property" generated="True" extra-ref="IPCNamespacePath"/>
Expand Down Expand Up @@ -7054,6 +7066,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b ProtectHostname = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b MemoryKSM = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s NetworkNamespacePath = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s IPCNamespacePath = '...';
Expand Down Expand Up @@ -7559,6 +7573,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {

<!--property ProtectHostname is not documented!-->

<!--property MemoryKSM is not documented!-->

<!--property NetworkNamespacePath is not documented!-->

<!--property IPCNamespacePath is not documented!-->
Expand Down Expand Up @@ -8123,6 +8139,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {

<variablelist class="dbus-property" generated="True" extra-ref="ProtectHostname"/>

<variablelist class="dbus-property" generated="True" extra-ref="MemoryKSM"/>

<variablelist class="dbus-property" generated="True" extra-ref="NetworkNamespacePath"/>

<variablelist class="dbus-property" generated="True" extra-ref="IPCNamespacePath"/>
Expand Down Expand Up @@ -8904,6 +8922,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b ProtectHostname = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b MemoryKSM = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s NetworkNamespacePath = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s IPCNamespacePath = '...';
Expand Down Expand Up @@ -9395,6 +9415,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {

<!--property ProtectHostname is not documented!-->

<!--property MemoryKSM is not documented!-->

<!--property NetworkNamespacePath is not documented!-->

<!--property IPCNamespacePath is not documented!-->
Expand Down Expand Up @@ -9945,6 +9967,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {

<variablelist class="dbus-property" generated="True" extra-ref="ProtectHostname"/>

<variablelist class="dbus-property" generated="True" extra-ref="MemoryKSM"/>

<variablelist class="dbus-property" generated="True" extra-ref="NetworkNamespacePath"/>

<variablelist class="dbus-property" generated="True" extra-ref="IPCNamespacePath"/>
Expand Down
16 changes: 16 additions & 0 deletions man/systemd.exec.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1773,6 +1773,22 @@ BindReadOnlyPaths=/var/lib/systemd</programlisting>
<xi:include href="system-or-user-ns.xml" xpointer="singular"/></listitem>
</varlistentry>

<varlistentry>
<term><varname>MemoryKSM=</varname></term>

<listitem><para>Takes a boolean argument. When set, it enables KSM (kernel samepage merging) for
the processes. KSM is a memory-saving de-duplication feature. Anonymous memory pages with identical
content can be replaced by a single write-protected page. This feature should only be enabled for
jobs that share the same security domain. For details, see
<ulink url="https://docs.kernel.org/admin-guide/mm/ksm.html">Kernel Samepage Merging</ulink> in the
kernel documentation.</para>

<para>Note that this functionality might not be available, for example if KSM is disabled in the
kernel, or the kernel doesn't support controlling KSM at the process level through
<function>prctl()</function>.</para>
</listitem>
</varlistentry>

<varlistentry>
<term><varname>PrivateUsers=</varname></term>

Expand Down
1 change: 1 addition & 0 deletions mkosi.kernel.config
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ CONFIG_X86_MSR=y
CONFIG_XFRM_USER=y
CONFIG_XFS_FS=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_KSM=y

# CONFIG_WIRELESS is not set
# CONFIG_WLAN is not set
Expand Down
4 changes: 4 additions & 0 deletions src/basic/missing_prctl.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,7 @@
#ifndef PR_MDWE_REFUSE_EXEC_GAIN
#define PR_MDWE_REFUSE_EXEC_GAIN 1
#endif

#ifndef PR_SET_MEMORY_MERGE
#define PR_SET_MEMORY_MERGE 67
#endif
4 changes: 4 additions & 0 deletions src/core/dbus-execute.c
Original file line number Diff line number Diff line change
Expand Up @@ -1346,6 +1346,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("ProtectProc", "s", property_get_protect_proc, offsetof(ExecContext, protect_proc), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProcSubset", "s", property_get_proc_subset, offsetof(ExecContext, proc_subset), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectHostname", "b", bus_property_get_bool, offsetof(ExecContext, protect_hostname), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MemoryKSM", "b", bus_property_get_tristate, offsetof(ExecContext, memory_ksm), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("NetworkNamespacePath", "s", NULL, offsetof(ExecContext, network_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("IPCNamespacePath", "s", NULL, offsetof(ExecContext, ipc_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RootImagePolicy", "s", property_get_image_policy, offsetof(ExecContext, root_image_policy), SD_BUS_VTABLE_PROPERTY_CONST),
Expand Down Expand Up @@ -2023,6 +2024,9 @@ int bus_exec_context_set_transient_property(
if (streq(name, "ProtectHostname"))
return bus_set_transient_bool(u, name, &c->protect_hostname, message, flags, error);

if (streq(name, "MemoryKSM"))
return bus_set_transient_tristate(u, name, &c->memory_ksm, message, flags, error);

if (streq(name, "UtmpIdentifier"))
return bus_set_transient_string(u, name, &c->utmp_id, message, flags, error);

Expand Down
11 changes: 11 additions & 0 deletions src/core/execute.c
Original file line number Diff line number Diff line change
Expand Up @@ -5193,6 +5193,16 @@ static int exec_child(
return r;
}

if (context->memory_ksm >= 0)
if (prctl(PR_SET_MEMORY_MERGE, context->memory_ksm) < 0) {
if (ERRNO_IS_NOT_SUPPORTED(errno))
log_unit_debug_errno(unit, errno, "KSM support not available, ignoring.");
else {
*exit_status = EXIT_KSM;
return log_unit_error_errno(unit, errno, "Failed to set KSM: %m");
}
}

/* Drop groups as early as possible.
* This needs to be done after PrivateDevices=y setup as device nodes should be owned by the host's root.
* For non-root in a userns, devices will be owned by the user/group before the group change, and nobody. */
Expand Down Expand Up @@ -5773,6 +5783,7 @@ void exec_context_init(ExecContext *c) {
c->tty_cols = UINT_MAX;
numa_policy_reset(&c->numa_policy);
c->private_mounts = -1;
c->memory_ksm = -1;
}

void exec_context_done(ExecContext *c) {
Expand Down
1 change: 1 addition & 0 deletions src/core/execute.h
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ struct ExecContext {
ProcSubset proc_subset; /* subset= */

int private_mounts;
int memory_ksm;
keszybz marked this conversation as resolved.
Show resolved Hide resolved
bool private_tmp;
bool private_network;
bool private_devices;
Expand Down
1 change: 1 addition & 0 deletions src/core/load-fragment-gperf.gperf.in
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@
{{type}}.SmackProcessLabel, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
{% endif %}
{{type}}.ProtectHostname, config_parse_bool, 0, offsetof({{type}}, exec_context.protect_hostname)
{{type}}.MemoryKSM, config_parse_tristate, 0, offsetof({{type}}, exec_context.memory_ksm)
{%- endmacro -%}

{%- macro KILL_CONTEXT_CONFIG_ITEMS(type) -%}
Expand Down
1 change: 1 addition & 0 deletions src/shared/bus-unit-util.c
Original file line number Diff line number Diff line change
Expand Up @@ -991,6 +991,7 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
"CPUSchedulingResetOnFork",
"LockPersonality",
"ProtectHostname",
"MemoryKSM",
"RestrictSUIDSGID"))
return bus_append_parse_boolean(m, field, eq);

Expand Down
1 change: 1 addition & 0 deletions src/shared/exit-status.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ const ExitStatusMapping exit_status_mappings[256] = {
[EXIT_NUMA_POLICY] = { "NUMA_POLICY", EXIT_STATUS_SYSTEMD },
[EXIT_CREDENTIALS] = { "CREDENTIALS", EXIT_STATUS_SYSTEMD },
[EXIT_BPF] = { "BPF", EXIT_STATUS_SYSTEMD },
[EXIT_KSM] = { "KSM", EXIT_STATUS_SYSTEMD },

[EXIT_EXCEPTION] = { "EXCEPTION", EXIT_STATUS_SYSTEMD },

Expand Down
1 change: 1 addition & 0 deletions src/shared/exit-status.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ enum {
EXIT_NUMA_POLICY,
EXIT_CREDENTIALS,
EXIT_BPF,
EXIT_KSM,
YHNdnzj marked this conversation as resolved.
Show resolved Hide resolved

EXIT_EXCEPTION = 255, /* Whenever we want to propagate an abnormal/signal exit, in line with bash */
};
Expand Down