Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow sysctl assignments to fail #13191

Merged
merged 7 commits into from
Jul 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 12 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@ systemd System and Service Manager

CHANGES WITH 243 in spe:

* This release enables unprivileged programs (i.e. requiring neither
setuid nor file capabilities) to send ICMP Echo (i.e. ping) requests
by turning on the net.ipv4.ping_group_range sysctl of the Linux
kernel for the whole UNIX group range, i.e. all processes. This
change should be reasonably safe, as the kernel support for it was
specifically implemented to allow safe access to ICMP Echo for
processes lacking any privileges. If this is not desirable, it can be
disabled again by setting the parameter to "1 0".

* Previously, filters defined with SystemCallFilter= would have the
effect that an calling an offending system call would terminate the
calling thread. This behaviour never made much sense, since killing
Expand Down Expand Up @@ -339,6 +348,9 @@ CHANGES WITH 243 in spe:
* "localectl list-locales" won't list non-UTF-8 locales anymore. It's
2019. (You can set non-UTF-8 locales though, if you know there name.)

* If variable assignments in sysctl.d/ files are prefixed with "-" any
failures to apply them are now ignored.

Contributions from: Aaron Barany, Adrian Bunk, Alan Jenkins, Andrej
Valek, Anita Zhang, Arian van Putten, Balint Reczey, Bastien Nocera,
Ben Boeckel, Benjamin Robin, camoz, Chen Qi, Chris Chiu, Chris Down,
Expand Down
4 changes: 4 additions & 0 deletions man/sysctl.d.xml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@
<filename>/proc/sys/net/ipv4/conf/enp3s0.200/forwarding</filename>.
</para>

<para>If a variable assignment is prefixed with a single <literal>-</literal> character, any attempts to
set it that fail will be ignored (though are logged). Moreover, any access permission errors, and
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe:

Any access permission errors and attempts to write variables not defined on the local system are logged, but do not cause the the service to fail. Moreover, if a variable assignment is prefixed with a single "-" character, failure to set the variable will be logged, but will not cause the service to fail. All other errors when setting variables cause the service to return failure at the end (other variables are still processed).

attempts to write variables not defined on the local system are ignored (and logged) too.</para>

<para>The settings configured with <filename>sysctl.d</filename>
files will be applied early on boot. The network
interface-specific options will also be applied individually for
Expand Down
121 changes: 83 additions & 38 deletions src/sysctl/sysctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,25 +30,71 @@ static PagerFlags arg_pager_flags = 0;

STATIC_DESTRUCTOR_REGISTER(arg_prefixes, strv_freep);

typedef struct Option {
char *key;
char *value;
bool ignore_failure;
} Option;

static Option *option_free(Option *o) {
if (!o)
return NULL;

free(o->key);
free(o->value);

return mfree(o);
}

DEFINE_TRIVIAL_CLEANUP_FUNC(Option*, option_free);
DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(option_hash_ops, char, string_hash_func, string_compare_func, Option, option_free);

static Option *option_new(
const char *key,
const char *value,
bool ignore_failure) {

_cleanup_(option_freep) Option *o = NULL;

assert(key);
assert(value);

o = new(Option, 1);
if (!o)
return NULL;

*o = (Option) {
.key = strdup(key),
.value = strdup(value),
.ignore_failure = ignore_failure,
};

if (!o->key || !o->value)
return NULL;

return TAKE_PTR(o);
}

static int apply_all(OrderedHashmap *sysctl_options) {
char *property, *value;
Option *option;
Iterator i;
int r = 0;

ORDERED_HASHMAP_FOREACH_KEY(value, property, sysctl_options, i) {
ORDERED_HASHMAP_FOREACH(option, sysctl_options, i) {
int k;

k = sysctl_write(property, value);
k = sysctl_write(option->key, option->value);
if (k < 0) {
/* If the sysctl is not available in the kernel or we are running with reduced privileges and
* cannot write it, then log about the issue at LOG_NOTICE level, and proceed without
* failing. (EROFS is treated as a permission problem here, since that's how container managers
* usually protected their sysctls.) In all other cases log an error and make the tool fail. */

if (IN_SET(k, -EPERM, -EACCES, -EROFS, -ENOENT))
log_notice_errno(k, "Couldn't write '%s' to '%s', ignoring: %m", value, property);
/* If the sysctl is not available in the kernel or we are running with reduced
* privileges and cannot write it, then log about the issue at LOG_NOTICE level, and
* proceed without failing. (EROFS is treated as a permission problem here, since
* that's how container managers usually protected their sysctls.) In all other cases
* log an error and make the tool fail. */

if (IN_SET(k, -EPERM, -EACCES, -EROFS, -ENOENT) || option->ignore_failure)
log_notice_errno(k, "Couldn't write '%s' to '%s', ignoring: %m", option->value, option->key);
else {
log_error_errno(k, "Couldn't write '%s' to '%s': %m", value, property);
log_error_errno(k, "Couldn't write '%s' to '%s': %m", option->value, option->key);
if (r == 0)
r = k;
}
Expand Down Expand Up @@ -94,9 +140,11 @@ static int parse_file(OrderedHashmap *sysctl_options, const char *path, bool ign

log_debug("Parsing %s", path);
for (;;) {
char *p, *value, *new_value, *property, *existing;
_cleanup_(option_freep) Option *new_option = NULL;
_cleanup_free_ char *l = NULL;
void *v;
bool ignore_failure;
Option *existing;
char *p, *value;
int k;

k = read_line(f, LONG_LINE_MAX, &l);
Expand All @@ -116,8 +164,7 @@ static int parse_file(OrderedHashmap *sysctl_options, const char *path, bool ign

value = strchr(p, '=');
if (!value) {
log_error("Line is not an assignment at '%s:%u': %s", path, c, p);

log_syntax(NULL, LOG_WARNING, path, c, 0, "Line is not an assignment, ignoring: %s", p);
if (r == 0)
r = -EINVAL;
continue;
Expand All @@ -126,39 +173,37 @@ static int parse_file(OrderedHashmap *sysctl_options, const char *path, bool ign
*value = 0;
value++;

p = sysctl_normalize(strstrip(p));
p = strstrip(p);
ignore_failure = p[0] == '-';
if (ignore_failure)
p++;

p = sysctl_normalize(p);
value = strstrip(value);

if (!test_prefix(p))
continue;

existing = ordered_hashmap_get2(sysctl_options, p, &v);
existing = ordered_hashmap_get(sysctl_options, p);
if (existing) {
if (streq(value, existing))
if (streq(value, existing->value)) {
existing->ignore_failure = existing->ignore_failure || ignore_failure;
continue;
}

log_debug("Overwriting earlier assignment of %s at '%s:%u'.", p, path, c);
free(ordered_hashmap_remove(sysctl_options, p));
free(v);
option_free(ordered_hashmap_remove(sysctl_options, p));
}

property = strdup(p);
if (!property)
new_option = option_new(p, value, ignore_failure);
if (!new_option)
return log_oom();

new_value = strdup(value);
if (!new_value) {
free(property);
return log_oom();
}
k = ordered_hashmap_put(sysctl_options, new_option->key, new_option);
if (k < 0)
return log_error_errno(k, "Failed to add sysctl variable %s to hashmap: %m", p);

k = ordered_hashmap_put(sysctl_options, property, new_value);
if (k < 0) {
log_error_errno(k, "Failed to add sysctl variable %s to hashmap: %m", property);
free(property);
free(new_value);
return k;
}
TAKE_PTR(new_option);
}

return r;
Expand Down Expand Up @@ -265,7 +310,7 @@ static int parse_argv(int argc, char *argv[]) {
}

static int run(int argc, char *argv[]) {
_cleanup_(ordered_hashmap_free_free_freep) OrderedHashmap *sysctl_options = NULL;
_cleanup_(ordered_hashmap_freep) OrderedHashmap *sysctl_options = NULL;
int r, k;

r = parse_argv(argc, argv);
Expand All @@ -276,15 +321,15 @@ static int run(int argc, char *argv[]) {

umask(0022);

sysctl_options = ordered_hashmap_new(&path_hash_ops);
sysctl_options = ordered_hashmap_new(&option_hash_ops);
if (!sysctl_options)
return log_oom();

r = 0;

if (argc > optind) {
int i;

r = 0;

for (i = optind; i < argc; i++) {
k = parse_file(sysctl_options, argv[i], false);
if (k < 0 && r == 0)
Expand Down
8 changes: 8 additions & 0 deletions sysctl.d/50-default.conf
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@ net.ipv4.conf.all.accept_source_route = 0
# Promote secondary addresses when the primary address is removed
net.ipv4.conf.all.promote_secondaries = 1

# ping(8) without CAP_NET_ADMIN and CAP_NET_RAW
# The upper limit is set to 2^31-1. Values greater than that get rejected by
# the kernel because of this definition in linux/include/net/ping.h:
# #define GID_T_MAX (((gid_t)~0U) >> 1)
# That's not so bad because values between 2^31 and 2^32-1 are reserved on
# systemd-based systems anyway: https://systemd.io/UIDS-GIDS.html#summary
-net.ipv4.ping_group_range = 0 2147483647
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not compatible with sysctl anymore, and will in particular break sysctl --system.


# Fair Queue CoDel packet scheduler to fight bufferbloat
net.core.default_qdisc = fq_codel

Expand Down