Skip to content
Permalink
Browse files

bpf-firewall: custom BPF programs through IP(Ingress|Egress)FilterPath=

Takes a single /sys/fs/bpf/pinned_prog string as argument, but may be
specified multiple times. An empty assignment resets all previous filters.

Closes #10227
  • Loading branch information
pothos authored and poettering committed Apr 23, 2019
1 parent 2d901d3 commit fab347489fcfafbc8367c86afc637ce1b81ae59e
@@ -618,6 +618,39 @@
</listitem>
</varlistentry>

<varlistentry>
<term><varname>IPIngressFilterPath=<replaceable>BPF_FS_PROGRAMM_PATH</replaceable></varname></term>
<term><varname>IPEgressFilterPath=<replaceable>BPF_FS_PROGRAMM_PATH</replaceable></varname></term>

<listitem>
<para>Add custom network traffic filters implemented as BPF programs, applying to all IP packets
sent and received over <constant>AF_INET</constant> and <constant>AF_INET6</constant> sockets.
Takes an absolute path to a pinned BPF program in the BPF virtual filesystem (<filename>/sys/fs/bpf/</filename>).
</para>

<para>The filters configured with this option are applied to all sockets created by processes
of this unit (or in the case of socket units, associated with it). The filters are loaded in addition
to filters any of the parent slice units this unit might be a member of as well as any
<varname>IPAddressAllow=</varname> and <varname>IPAddressDeny=</varname> filters in any of these units.
By default there are no filters specified.</para>

<para>If these settings are used multiple times in the same unit all the specified programs are attached. If an
empty string is assigned to these settings the program list is reset and all previous specified programs ignored.</para>

<para>Note that for socket-activated services, the IP filter programs configured on the socket unit apply to
all sockets associated with it directly, but not to any sockets created by the ultimately activated services
for it. Conversely, the IP filter programs configured for the service are not applied to any sockets passed into
the service via socket activation. Thus, it is usually a good idea, to replicate the IP filter programs on both
the socket and the service unit, however it often makes sense to maintain one configuration more open and the other
one more restricted, depending on the usecase.</para>

<para>Note that these settings might not be supported on some systems (for example if eBPF control group
support is not enabled in the underlying kernel or container manager). These settings will fail the service in
that case. If compatibility with such systems is desired it is hence recommended to attach your filter manually
(requires <varname>Delegate=</varname><constant>yes</constant>) instead of using this setting.</para>
</listitem>
</varlistentry>

<varlistentry>
<term><varname>DeviceAllow=</varname></term>

@@ -45,6 +45,9 @@ struct security_info {
bool ip_address_allow_localhost;
bool ip_address_allow_other;

bool ip_filters_custom_ingress;
bool ip_filters_custom_egress;

char *keyring_mode;
bool lock_personality;
bool memory_deny_write_execute;
@@ -590,7 +593,10 @@ static int assess_ip_address_allow(
assert(ret_badness);
assert(ret_description);

if (!info->ip_address_deny_all) {
if (info->ip_filters_custom_ingress || info->ip_filters_custom_egress) {
d = strdup("Service defines custom ingress/egress IP filters with BPF programs");
b = 0;
} else if (!info->ip_address_deny_all) {
d = strdup("Service does not define an IP address whitelist");
b = 10;
} else if (info->ip_address_allow_other) {
@@ -1824,6 +1830,33 @@ static int property_read_ip_address_allow(
return sd_bus_message_exit_container(m);
}

static int property_read_ip_filters(
sd_bus *bus,
const char *member,
sd_bus_message *m,
sd_bus_error *error,
void *userdata) {

struct security_info *info = userdata;
_cleanup_(strv_freep) char **l = NULL;
int r;

assert(bus);
assert(member);
assert(m);

r = sd_bus_message_read_strv(m, &l);
if (r < 0)
return r;

if (streq(member, "IPIngressFilterPath"))
info->ip_filters_custom_ingress = !strv_isempty(l);
else if (streq(member, "IPEgressFilterPath"))
info->ip_filters_custom_ingress = !strv_isempty(l);

return 0;
}

static int property_read_device_allow(
sd_bus *bus,
const char *member,
@@ -1873,6 +1906,8 @@ static int acquire_security_info(sd_bus *bus, const char *name, struct security_
{ "FragmentPath", "s", NULL, offsetof(struct security_info, fragment_path) },
{ "IPAddressAllow", "a(iayu)", property_read_ip_address_allow, 0 },
{ "IPAddressDeny", "a(iayu)", property_read_ip_address_allow, 0 },
{ "IPIngressFilterPath", "as", property_read_ip_filters, 0 },
{ "IPEgressFilterPath", "as", property_read_ip_filters, 0 },
{ "Id", "s", NULL, offsetof(struct security_info, id) },
{ "KeyringMode", "s", NULL, offsetof(struct security_info, keyring_mode) },
{ "LoadState", "s", NULL, offsetof(struct security_info, load_state) },
@@ -587,6 +587,95 @@ int bpf_firewall_compile(Unit *u) {
return 0;
}

DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(filter_prog_hash_ops, void, trivial_hash_func, trivial_compare_func, BPFProgram, bpf_program_unref);

static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set) {
char **bpf_fs_path;

set_clear(*set);

STRV_FOREACH(bpf_fs_path, filter_paths) {
_cleanup_free_ BPFProgram *prog = NULL;
int r;

r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &prog);
if (r < 0)
return log_unit_error_errno(u, r, "Can't allocate CGROUP SKB BPF program: %m");

r = bpf_program_load_from_bpf_fs(prog, *bpf_fs_path);
if (r < 0)
return log_unit_error_errno(u, r, "Loading of ingress BPF program %s failed: %m", *bpf_fs_path);

r = set_ensure_allocated(set, &filter_prog_hash_ops);
if (r < 0)
return log_unit_error_errno(u, r, "Can't allocate BPF program set: %m");

r = set_put(*set, prog);
if (r < 0)
return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m");
TAKE_PTR(prog);
}

return 0;
}

int bpf_firewall_load_custom(Unit *u) {
CGroupContext *cc;
int r, supported;

assert(u);

cc = unit_get_cgroup_context(u);
if (!cc)
return 0;

if (!(cc->ip_filters_ingress || cc->ip_filters_egress))
return 0;

supported = bpf_firewall_supported();
if (supported < 0)
return supported;

if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI)
return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "BPF_F_ALLOW_MULTI not supported on this manager, cannot attach custom BPF programs.");

r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_ingress, &u->ip_bpf_custom_ingress);
if (r < 0)
return r;
r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_egress, &u->ip_bpf_custom_egress);
if (r < 0)
return r;

return 0;
}

static int attach_custom_bpf_progs(Unit *u, const char *path, int attach_type, Set **set, Set **set_installed) {
BPFProgram *prog;
Iterator i;
int r;

assert(u);

set_clear(*set_installed);

SET_FOREACH(prog, *set, i) {
r = bpf_program_cgroup_attach(prog, attach_type, path, BPF_F_ALLOW_MULTI);
if (r < 0)
return log_unit_error_errno(u, r, "Attaching custom egress BPF program to cgroup %s failed: %m", path);
/* Remember that these BPF programs are installed now. */
r = set_ensure_allocated(set_installed, &filter_prog_hash_ops);
if (r < 0)
return log_unit_error_errno(u, r, "Can't allocate BPF program set: %m");

r = set_put(*set_installed, prog);
if (r < 0)
return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m");
bpf_program_ref(prog);
}

return 0;
}

int bpf_firewall_install(Unit *u) {
_cleanup_free_ char *path = NULL;
CGroupContext *cc;
@@ -614,6 +703,9 @@ int bpf_firewall_install(Unit *u) {
log_unit_debug(u, "BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units.");
return -EOPNOTSUPP;
}
if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI &&
(!set_isempty(u->ip_bpf_custom_ingress) || !set_isempty(u->ip_bpf_custom_egress)))
return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "BPF_F_ALLOW_MULTI not supported on this manager, cannot attach custom BPF programs.");

r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
if (r < 0)
@@ -628,7 +720,8 @@ int bpf_firewall_install(Unit *u) {
u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed);

if (u->ip_bpf_egress) {
r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags);
r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path,
flags | (set_isempty(u->ip_bpf_custom_egress) ? 0 : BPF_F_ALLOW_MULTI));
if (r < 0)
return log_unit_error_errno(u, r, "Attaching egress BPF program to cgroup %s failed: %m", path);

@@ -637,13 +730,22 @@ int bpf_firewall_install(Unit *u) {
}

if (u->ip_bpf_ingress) {
r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, flags);
r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path,
flags | (set_isempty(u->ip_bpf_custom_ingress) ? 0 : BPF_F_ALLOW_MULTI));
if (r < 0)
return log_unit_error_errno(u, r, "Attaching ingress BPF program to cgroup %s failed: %m", path);

u->ip_bpf_ingress_installed = bpf_program_ref(u->ip_bpf_ingress);
}

r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_EGRESS, &u->ip_bpf_custom_egress, &u->ip_bpf_custom_egress_installed);
if (r < 0)
return r;

r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_INGRESS, &u->ip_bpf_custom_ingress, &u->ip_bpf_custom_ingress_installed);
if (r < 0)
return r;

return 0;
}

@@ -15,6 +15,7 @@ int bpf_firewall_supported(void);

int bpf_firewall_compile(Unit *u);
int bpf_firewall_install(Unit *u);
int bpf_firewall_load_custom(Unit *u);

int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets);
int bpf_firewall_reset_accounting(int map_fd);
@@ -199,6 +199,9 @@ void cgroup_context_done(CGroupContext *c) {

c->ip_address_allow = ip_address_access_free_all(c->ip_address_allow);
c->ip_address_deny = ip_address_access_free_all(c->ip_address_deny);

c->ip_filters_ingress = strv_free(c->ip_filters_ingress);
c->ip_filters_egress = strv_free(c->ip_filters_egress);
}

void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
@@ -210,6 +213,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
CGroupBlockIODeviceWeight *w;
CGroupDeviceAllow *a;
IPAddressAccessItem *iaai;
char **path;
char u[FORMAT_TIMESPAN_MAX];
char v[FORMAT_TIMESPAN_MAX];

@@ -360,6 +364,12 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
(void) in_addr_to_string(iaai->family, &iaai->address, &k);
fprintf(f, "%sIPAddressDeny=%s/%u\n", prefix, strnull(k), iaai->prefixlen);
}

STRV_FOREACH(path, c->ip_filters_ingress)
fprintf(f, "%sIPIngressFilterPath=%s\n", prefix, *path);

STRV_FOREACH(path, c->ip_filters_egress)
fprintf(f, "%sIPEgressFilterPath=%s\n", prefix, *path);
}

int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode) {
@@ -945,6 +955,7 @@ static void cgroup_apply_firewall(Unit *u) {
if (bpf_firewall_compile(u) < 0)
return;

(void) bpf_firewall_load_custom(u);
(void) bpf_firewall_install(u);
}

@@ -1353,7 +1364,9 @@ static bool unit_get_needs_bpf_firewall(Unit *u) {

if (c->ip_accounting ||
c->ip_address_allow ||
c->ip_address_deny)
c->ip_address_deny ||
c->ip_filters_ingress ||
c->ip_filters_egress)
return true;

/* If any parent slice has an IP access list defined, it applies too */
@@ -1919,6 +1932,12 @@ int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) {
if (set_isempty(pids))
return 0;

/* Load any custom firewall BPF programs here once to test if they are existing and actually loadable.
* Fail here early since later errors in the call chain unit_realize_cgroup to cgroup_context_apply are ignored. */
r = bpf_firewall_load_custom(u);
if (r < 0)
return r;

r = unit_realize_cgroup(u);
if (r < 0)
return r;
@@ -114,6 +114,9 @@ struct CGroupContext {
LIST_HEAD(IPAddressAccessItem, ip_address_allow);
LIST_HEAD(IPAddressAccessItem, ip_address_deny);

char **ip_filters_ingress;
char **ip_filters_egress;

/* For legacy hierarchies */
uint64_t cpu_shares;
uint64_t startup_cpu_shares;

0 comments on commit fab3474

Please sign in to comment.
You can’t perform that action at this time.