Skip to content

Commit

Permalink
bpf-firewall: custom BPF programs through IP(Ingress|Egress)FilterPath=
Browse files Browse the repository at this point in the history
Takes a single /sys/fs/bpf/pinned_prog string as argument, but may be
specified multiple times. An empty assignment resets all previous filters.

Closes #10227
  • Loading branch information
pothos authored and poettering committed Jun 25, 2019
1 parent 2d901d3 commit fab3474
Show file tree
Hide file tree
Showing 16 changed files with 436 additions and 7 deletions.
33 changes: 33 additions & 0 deletions man/systemd.resource-control.xml
Expand Up @@ -618,6 +618,39 @@
</listitem> </listitem>
</varlistentry> </varlistentry>


<varlistentry>
<term><varname>IPIngressFilterPath=<replaceable>BPF_FS_PROGRAMM_PATH</replaceable></varname></term>
<term><varname>IPEgressFilterPath=<replaceable>BPF_FS_PROGRAMM_PATH</replaceable></varname></term>

<listitem>
<para>Add custom network traffic filters implemented as BPF programs, applying to all IP packets
sent and received over <constant>AF_INET</constant> and <constant>AF_INET6</constant> sockets.
Takes an absolute path to a pinned BPF program in the BPF virtual filesystem (<filename>/sys/fs/bpf/</filename>).
</para>

<para>The filters configured with this option are applied to all sockets created by processes
of this unit (or in the case of socket units, associated with it). The filters are loaded in addition
to filters any of the parent slice units this unit might be a member of as well as any
<varname>IPAddressAllow=</varname> and <varname>IPAddressDeny=</varname> filters in any of these units.
By default there are no filters specified.</para>

<para>If these settings are used multiple times in the same unit all the specified programs are attached. If an
empty string is assigned to these settings the program list is reset and all previous specified programs ignored.</para>

<para>Note that for socket-activated services, the IP filter programs configured on the socket unit apply to
all sockets associated with it directly, but not to any sockets created by the ultimately activated services
for it. Conversely, the IP filter programs configured for the service are not applied to any sockets passed into
the service via socket activation. Thus, it is usually a good idea, to replicate the IP filter programs on both
the socket and the service unit, however it often makes sense to maintain one configuration more open and the other
one more restricted, depending on the usecase.</para>

<para>Note that these settings might not be supported on some systems (for example if eBPF control group
support is not enabled in the underlying kernel or container manager). These settings will fail the service in
that case. If compatibility with such systems is desired it is hence recommended to attach your filter manually
(requires <varname>Delegate=</varname><constant>yes</constant>) instead of using this setting.</para>
</listitem>
</varlistentry>

<varlistentry> <varlistentry>
<term><varname>DeviceAllow=</varname></term> <term><varname>DeviceAllow=</varname></term>


Expand Down
37 changes: 36 additions & 1 deletion src/analyze/analyze-security.c
Expand Up @@ -45,6 +45,9 @@ struct security_info {
bool ip_address_allow_localhost; bool ip_address_allow_localhost;
bool ip_address_allow_other; bool ip_address_allow_other;


bool ip_filters_custom_ingress;
bool ip_filters_custom_egress;

char *keyring_mode; char *keyring_mode;
bool lock_personality; bool lock_personality;
bool memory_deny_write_execute; bool memory_deny_write_execute;
Expand Down Expand Up @@ -590,7 +593,10 @@ static int assess_ip_address_allow(
assert(ret_badness); assert(ret_badness);
assert(ret_description); assert(ret_description);


if (!info->ip_address_deny_all) { if (info->ip_filters_custom_ingress || info->ip_filters_custom_egress) {
d = strdup("Service defines custom ingress/egress IP filters with BPF programs");
b = 0;
} else if (!info->ip_address_deny_all) {
d = strdup("Service does not define an IP address whitelist"); d = strdup("Service does not define an IP address whitelist");
b = 10; b = 10;
} else if (info->ip_address_allow_other) { } else if (info->ip_address_allow_other) {
Expand Down Expand Up @@ -1824,6 +1830,33 @@ static int property_read_ip_address_allow(
return sd_bus_message_exit_container(m); return sd_bus_message_exit_container(m);
} }


static int property_read_ip_filters(
sd_bus *bus,
const char *member,
sd_bus_message *m,
sd_bus_error *error,
void *userdata) {

struct security_info *info = userdata;
_cleanup_(strv_freep) char **l = NULL;
int r;

assert(bus);
assert(member);
assert(m);

r = sd_bus_message_read_strv(m, &l);
if (r < 0)
return r;

if (streq(member, "IPIngressFilterPath"))
info->ip_filters_custom_ingress = !strv_isempty(l);
else if (streq(member, "IPEgressFilterPath"))
info->ip_filters_custom_ingress = !strv_isempty(l);

return 0;
}

static int property_read_device_allow( static int property_read_device_allow(
sd_bus *bus, sd_bus *bus,
const char *member, const char *member,
Expand Down Expand Up @@ -1873,6 +1906,8 @@ static int acquire_security_info(sd_bus *bus, const char *name, struct security_
{ "FragmentPath", "s", NULL, offsetof(struct security_info, fragment_path) }, { "FragmentPath", "s", NULL, offsetof(struct security_info, fragment_path) },
{ "IPAddressAllow", "a(iayu)", property_read_ip_address_allow, 0 }, { "IPAddressAllow", "a(iayu)", property_read_ip_address_allow, 0 },
{ "IPAddressDeny", "a(iayu)", property_read_ip_address_allow, 0 }, { "IPAddressDeny", "a(iayu)", property_read_ip_address_allow, 0 },
{ "IPIngressFilterPath", "as", property_read_ip_filters, 0 },
{ "IPEgressFilterPath", "as", property_read_ip_filters, 0 },
{ "Id", "s", NULL, offsetof(struct security_info, id) }, { "Id", "s", NULL, offsetof(struct security_info, id) },
{ "KeyringMode", "s", NULL, offsetof(struct security_info, keyring_mode) }, { "KeyringMode", "s", NULL, offsetof(struct security_info, keyring_mode) },
{ "LoadState", "s", NULL, offsetof(struct security_info, load_state) }, { "LoadState", "s", NULL, offsetof(struct security_info, load_state) },
Expand Down
106 changes: 104 additions & 2 deletions src/core/bpf-firewall.c
Expand Up @@ -587,6 +587,95 @@ int bpf_firewall_compile(Unit *u) {
return 0; return 0;
} }


DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(filter_prog_hash_ops, void, trivial_hash_func, trivial_compare_func, BPFProgram, bpf_program_unref);

static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set) {
char **bpf_fs_path;

set_clear(*set);

STRV_FOREACH(bpf_fs_path, filter_paths) {
_cleanup_free_ BPFProgram *prog = NULL;
int r;

r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &prog);
if (r < 0)
return log_unit_error_errno(u, r, "Can't allocate CGROUP SKB BPF program: %m");

r = bpf_program_load_from_bpf_fs(prog, *bpf_fs_path);
if (r < 0)
return log_unit_error_errno(u, r, "Loading of ingress BPF program %s failed: %m", *bpf_fs_path);

r = set_ensure_allocated(set, &filter_prog_hash_ops);
if (r < 0)
return log_unit_error_errno(u, r, "Can't allocate BPF program set: %m");

r = set_put(*set, prog);
if (r < 0)
return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m");
TAKE_PTR(prog);
}

return 0;
}

int bpf_firewall_load_custom(Unit *u) {
CGroupContext *cc;
int r, supported;

assert(u);

cc = unit_get_cgroup_context(u);
if (!cc)
return 0;

if (!(cc->ip_filters_ingress || cc->ip_filters_egress))
return 0;

supported = bpf_firewall_supported();
if (supported < 0)
return supported;

if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI)
return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "BPF_F_ALLOW_MULTI not supported on this manager, cannot attach custom BPF programs.");

r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_ingress, &u->ip_bpf_custom_ingress);
if (r < 0)
return r;
r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_egress, &u->ip_bpf_custom_egress);
if (r < 0)
return r;

return 0;
}

static int attach_custom_bpf_progs(Unit *u, const char *path, int attach_type, Set **set, Set **set_installed) {
BPFProgram *prog;
Iterator i;
int r;

assert(u);

set_clear(*set_installed);

SET_FOREACH(prog, *set, i) {
r = bpf_program_cgroup_attach(prog, attach_type, path, BPF_F_ALLOW_MULTI);
if (r < 0)
return log_unit_error_errno(u, r, "Attaching custom egress BPF program to cgroup %s failed: %m", path);
/* Remember that these BPF programs are installed now. */
r = set_ensure_allocated(set_installed, &filter_prog_hash_ops);
if (r < 0)
return log_unit_error_errno(u, r, "Can't allocate BPF program set: %m");

r = set_put(*set_installed, prog);
if (r < 0)
return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m");
bpf_program_ref(prog);
}

return 0;
}

int bpf_firewall_install(Unit *u) { int bpf_firewall_install(Unit *u) {
_cleanup_free_ char *path = NULL; _cleanup_free_ char *path = NULL;
CGroupContext *cc; CGroupContext *cc;
Expand Down Expand Up @@ -614,6 +703,9 @@ int bpf_firewall_install(Unit *u) {
log_unit_debug(u, "BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units."); log_unit_debug(u, "BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units.");
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI &&
(!set_isempty(u->ip_bpf_custom_ingress) || !set_isempty(u->ip_bpf_custom_egress)))
return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "BPF_F_ALLOW_MULTI not supported on this manager, cannot attach custom BPF programs.");


r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path); r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
if (r < 0) if (r < 0)
Expand All @@ -628,7 +720,8 @@ int bpf_firewall_install(Unit *u) {
u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed); u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed);


if (u->ip_bpf_egress) { if (u->ip_bpf_egress) {
r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags); r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path,
flags | (set_isempty(u->ip_bpf_custom_egress) ? 0 : BPF_F_ALLOW_MULTI));
if (r < 0) if (r < 0)
return log_unit_error_errno(u, r, "Attaching egress BPF program to cgroup %s failed: %m", path); return log_unit_error_errno(u, r, "Attaching egress BPF program to cgroup %s failed: %m", path);


Expand All @@ -637,13 +730,22 @@ int bpf_firewall_install(Unit *u) {
} }


if (u->ip_bpf_ingress) { if (u->ip_bpf_ingress) {
r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, flags); r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path,
flags | (set_isempty(u->ip_bpf_custom_ingress) ? 0 : BPF_F_ALLOW_MULTI));
if (r < 0) if (r < 0)
return log_unit_error_errno(u, r, "Attaching ingress BPF program to cgroup %s failed: %m", path); return log_unit_error_errno(u, r, "Attaching ingress BPF program to cgroup %s failed: %m", path);


u->ip_bpf_ingress_installed = bpf_program_ref(u->ip_bpf_ingress); u->ip_bpf_ingress_installed = bpf_program_ref(u->ip_bpf_ingress);
} }


r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_EGRESS, &u->ip_bpf_custom_egress, &u->ip_bpf_custom_egress_installed);
if (r < 0)
return r;

r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_INGRESS, &u->ip_bpf_custom_ingress, &u->ip_bpf_custom_ingress_installed);
if (r < 0)
return r;

return 0; return 0;
} }


Expand Down
1 change: 1 addition & 0 deletions src/core/bpf-firewall.h
Expand Up @@ -15,6 +15,7 @@ int bpf_firewall_supported(void);


int bpf_firewall_compile(Unit *u); int bpf_firewall_compile(Unit *u);
int bpf_firewall_install(Unit *u); int bpf_firewall_install(Unit *u);
int bpf_firewall_load_custom(Unit *u);


int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets); int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets);
int bpf_firewall_reset_accounting(int map_fd); int bpf_firewall_reset_accounting(int map_fd);
Expand Down
21 changes: 20 additions & 1 deletion src/core/cgroup.c
Expand Up @@ -199,6 +199,9 @@ void cgroup_context_done(CGroupContext *c) {


c->ip_address_allow = ip_address_access_free_all(c->ip_address_allow); c->ip_address_allow = ip_address_access_free_all(c->ip_address_allow);
c->ip_address_deny = ip_address_access_free_all(c->ip_address_deny); c->ip_address_deny = ip_address_access_free_all(c->ip_address_deny);

c->ip_filters_ingress = strv_free(c->ip_filters_ingress);
c->ip_filters_egress = strv_free(c->ip_filters_egress);
} }


void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
Expand All @@ -210,6 +213,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
CGroupBlockIODeviceWeight *w; CGroupBlockIODeviceWeight *w;
CGroupDeviceAllow *a; CGroupDeviceAllow *a;
IPAddressAccessItem *iaai; IPAddressAccessItem *iaai;
char **path;
char u[FORMAT_TIMESPAN_MAX]; char u[FORMAT_TIMESPAN_MAX];
char v[FORMAT_TIMESPAN_MAX]; char v[FORMAT_TIMESPAN_MAX];


Expand Down Expand Up @@ -360,6 +364,12 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
(void) in_addr_to_string(iaai->family, &iaai->address, &k); (void) in_addr_to_string(iaai->family, &iaai->address, &k);
fprintf(f, "%sIPAddressDeny=%s/%u\n", prefix, strnull(k), iaai->prefixlen); fprintf(f, "%sIPAddressDeny=%s/%u\n", prefix, strnull(k), iaai->prefixlen);
} }

STRV_FOREACH(path, c->ip_filters_ingress)
fprintf(f, "%sIPIngressFilterPath=%s\n", prefix, *path);

STRV_FOREACH(path, c->ip_filters_egress)
fprintf(f, "%sIPEgressFilterPath=%s\n", prefix, *path);
} }


int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode) { int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode) {
Expand Down Expand Up @@ -945,6 +955,7 @@ static void cgroup_apply_firewall(Unit *u) {
if (bpf_firewall_compile(u) < 0) if (bpf_firewall_compile(u) < 0)
return; return;


(void) bpf_firewall_load_custom(u);
(void) bpf_firewall_install(u); (void) bpf_firewall_install(u);
} }


Expand Down Expand Up @@ -1353,7 +1364,9 @@ static bool unit_get_needs_bpf_firewall(Unit *u) {


if (c->ip_accounting || if (c->ip_accounting ||
c->ip_address_allow || c->ip_address_allow ||
c->ip_address_deny) c->ip_address_deny ||
c->ip_filters_ingress ||
c->ip_filters_egress)
return true; return true;


/* If any parent slice has an IP access list defined, it applies too */ /* If any parent slice has an IP access list defined, it applies too */
Expand Down Expand Up @@ -1919,6 +1932,12 @@ int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) {
if (set_isempty(pids)) if (set_isempty(pids))
return 0; return 0;


/* Load any custom firewall BPF programs here once to test if they are existing and actually loadable.
* Fail here early since later errors in the call chain unit_realize_cgroup to cgroup_context_apply are ignored. */
r = bpf_firewall_load_custom(u);
if (r < 0)
return r;

r = unit_realize_cgroup(u); r = unit_realize_cgroup(u);
if (r < 0) if (r < 0)
return r; return r;
Expand Down
3 changes: 3 additions & 0 deletions src/core/cgroup.h
Expand Up @@ -114,6 +114,9 @@ struct CGroupContext {
LIST_HEAD(IPAddressAccessItem, ip_address_allow); LIST_HEAD(IPAddressAccessItem, ip_address_allow);
LIST_HEAD(IPAddressAccessItem, ip_address_deny); LIST_HEAD(IPAddressAccessItem, ip_address_deny);


char **ip_filters_ingress;
char **ip_filters_egress;

/* For legacy hierarchies */ /* For legacy hierarchies */
uint64_t cpu_shares; uint64_t cpu_shares;
uint64_t startup_cpu_shares; uint64_t startup_cpu_shares;
Expand Down

0 comments on commit fab3474

Please sign in to comment.