Skip to content

Commit fab3474

Browse files
pothospoettering
authored andcommitted
bpf-firewall: custom BPF programs through IP(Ingress|Egress)FilterPath=
Takes a single /sys/fs/bpf/pinned_prog string as argument, but may be specified multiple times. An empty assignment resets all previous filters. Closes #10227
1 parent 2d901d3 commit fab3474

16 files changed

+436
-7
lines changed

man/systemd.resource-control.xml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,39 @@
618618
</listitem>
619619
</varlistentry>
620620

621+
<varlistentry>
622+
<term><varname>IPIngressFilterPath=<replaceable>BPF_FS_PROGRAMM_PATH</replaceable></varname></term>
623+
<term><varname>IPEgressFilterPath=<replaceable>BPF_FS_PROGRAMM_PATH</replaceable></varname></term>
624+
625+
<listitem>
626+
<para>Add custom network traffic filters implemented as BPF programs, applying to all IP packets
627+
sent and received over <constant>AF_INET</constant> and <constant>AF_INET6</constant> sockets.
628+
Takes an absolute path to a pinned BPF program in the BPF virtual filesystem (<filename>/sys/fs/bpf/</filename>).
629+
</para>
630+
631+
<para>The filters configured with this option are applied to all sockets created by processes
632+
of this unit (or in the case of socket units, associated with it). The filters are loaded in addition
633+
to filters any of the parent slice units this unit might be a member of as well as any
634+
<varname>IPAddressAllow=</varname> and <varname>IPAddressDeny=</varname> filters in any of these units.
635+
By default there are no filters specified.</para>
636+
637+
<para>If these settings are used multiple times in the same unit all the specified programs are attached. If an
638+
empty string is assigned to these settings the program list is reset and all previous specified programs ignored.</para>
639+
640+
<para>Note that for socket-activated services, the IP filter programs configured on the socket unit apply to
641+
all sockets associated with it directly, but not to any sockets created by the ultimately activated services
642+
for it. Conversely, the IP filter programs configured for the service are not applied to any sockets passed into
643+
the service via socket activation. Thus, it is usually a good idea, to replicate the IP filter programs on both
644+
the socket and the service unit, however it often makes sense to maintain one configuration more open and the other
645+
one more restricted, depending on the usecase.</para>
646+
647+
<para>Note that these settings might not be supported on some systems (for example if eBPF control group
648+
support is not enabled in the underlying kernel or container manager). These settings will fail the service in
649+
that case. If compatibility with such systems is desired it is hence recommended to attach your filter manually
650+
(requires <varname>Delegate=</varname><constant>yes</constant>) instead of using this setting.</para>
651+
</listitem>
652+
</varlistentry>
653+
621654
<varlistentry>
622655
<term><varname>DeviceAllow=</varname></term>
623656

src/analyze/analyze-security.c

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ struct security_info {
4545
bool ip_address_allow_localhost;
4646
bool ip_address_allow_other;
4747

48+
bool ip_filters_custom_ingress;
49+
bool ip_filters_custom_egress;
50+
4851
char *keyring_mode;
4952
bool lock_personality;
5053
bool memory_deny_write_execute;
@@ -590,7 +593,10 @@ static int assess_ip_address_allow(
590593
assert(ret_badness);
591594
assert(ret_description);
592595

593-
if (!info->ip_address_deny_all) {
596+
if (info->ip_filters_custom_ingress || info->ip_filters_custom_egress) {
597+
d = strdup("Service defines custom ingress/egress IP filters with BPF programs");
598+
b = 0;
599+
} else if (!info->ip_address_deny_all) {
594600
d = strdup("Service does not define an IP address whitelist");
595601
b = 10;
596602
} else if (info->ip_address_allow_other) {
@@ -1824,6 +1830,33 @@ static int property_read_ip_address_allow(
18241830
return sd_bus_message_exit_container(m);
18251831
}
18261832

1833+
static int property_read_ip_filters(
1834+
sd_bus *bus,
1835+
const char *member,
1836+
sd_bus_message *m,
1837+
sd_bus_error *error,
1838+
void *userdata) {
1839+
1840+
struct security_info *info = userdata;
1841+
_cleanup_(strv_freep) char **l = NULL;
1842+
int r;
1843+
1844+
assert(bus);
1845+
assert(member);
1846+
assert(m);
1847+
1848+
r = sd_bus_message_read_strv(m, &l);
1849+
if (r < 0)
1850+
return r;
1851+
1852+
if (streq(member, "IPIngressFilterPath"))
1853+
info->ip_filters_custom_ingress = !strv_isempty(l);
1854+
else if (streq(member, "IPEgressFilterPath"))
1855+
info->ip_filters_custom_ingress = !strv_isempty(l);
1856+
1857+
return 0;
1858+
}
1859+
18271860
static int property_read_device_allow(
18281861
sd_bus *bus,
18291862
const char *member,
@@ -1873,6 +1906,8 @@ static int acquire_security_info(sd_bus *bus, const char *name, struct security_
18731906
{ "FragmentPath", "s", NULL, offsetof(struct security_info, fragment_path) },
18741907
{ "IPAddressAllow", "a(iayu)", property_read_ip_address_allow, 0 },
18751908
{ "IPAddressDeny", "a(iayu)", property_read_ip_address_allow, 0 },
1909+
{ "IPIngressFilterPath", "as", property_read_ip_filters, 0 },
1910+
{ "IPEgressFilterPath", "as", property_read_ip_filters, 0 },
18761911
{ "Id", "s", NULL, offsetof(struct security_info, id) },
18771912
{ "KeyringMode", "s", NULL, offsetof(struct security_info, keyring_mode) },
18781913
{ "LoadState", "s", NULL, offsetof(struct security_info, load_state) },

src/core/bpf-firewall.c

Lines changed: 104 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,95 @@ int bpf_firewall_compile(Unit *u) {
587587
return 0;
588588
}
589589

590+
DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(filter_prog_hash_ops, void, trivial_hash_func, trivial_compare_func, BPFProgram, bpf_program_unref);
591+
592+
static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set) {
593+
char **bpf_fs_path;
594+
595+
set_clear(*set);
596+
597+
STRV_FOREACH(bpf_fs_path, filter_paths) {
598+
_cleanup_free_ BPFProgram *prog = NULL;
599+
int r;
600+
601+
r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &prog);
602+
if (r < 0)
603+
return log_unit_error_errno(u, r, "Can't allocate CGROUP SKB BPF program: %m");
604+
605+
r = bpf_program_load_from_bpf_fs(prog, *bpf_fs_path);
606+
if (r < 0)
607+
return log_unit_error_errno(u, r, "Loading of ingress BPF program %s failed: %m", *bpf_fs_path);
608+
609+
r = set_ensure_allocated(set, &filter_prog_hash_ops);
610+
if (r < 0)
611+
return log_unit_error_errno(u, r, "Can't allocate BPF program set: %m");
612+
613+
r = set_put(*set, prog);
614+
if (r < 0)
615+
return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m");
616+
TAKE_PTR(prog);
617+
}
618+
619+
return 0;
620+
}
621+
622+
int bpf_firewall_load_custom(Unit *u) {
623+
CGroupContext *cc;
624+
int r, supported;
625+
626+
assert(u);
627+
628+
cc = unit_get_cgroup_context(u);
629+
if (!cc)
630+
return 0;
631+
632+
if (!(cc->ip_filters_ingress || cc->ip_filters_egress))
633+
return 0;
634+
635+
supported = bpf_firewall_supported();
636+
if (supported < 0)
637+
return supported;
638+
639+
if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI)
640+
return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "BPF_F_ALLOW_MULTI not supported on this manager, cannot attach custom BPF programs.");
641+
642+
r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_ingress, &u->ip_bpf_custom_ingress);
643+
if (r < 0)
644+
return r;
645+
r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_egress, &u->ip_bpf_custom_egress);
646+
if (r < 0)
647+
return r;
648+
649+
return 0;
650+
}
651+
652+
static int attach_custom_bpf_progs(Unit *u, const char *path, int attach_type, Set **set, Set **set_installed) {
653+
BPFProgram *prog;
654+
Iterator i;
655+
int r;
656+
657+
assert(u);
658+
659+
set_clear(*set_installed);
660+
661+
SET_FOREACH(prog, *set, i) {
662+
r = bpf_program_cgroup_attach(prog, attach_type, path, BPF_F_ALLOW_MULTI);
663+
if (r < 0)
664+
return log_unit_error_errno(u, r, "Attaching custom egress BPF program to cgroup %s failed: %m", path);
665+
/* Remember that these BPF programs are installed now. */
666+
r = set_ensure_allocated(set_installed, &filter_prog_hash_ops);
667+
if (r < 0)
668+
return log_unit_error_errno(u, r, "Can't allocate BPF program set: %m");
669+
670+
r = set_put(*set_installed, prog);
671+
if (r < 0)
672+
return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m");
673+
bpf_program_ref(prog);
674+
}
675+
676+
return 0;
677+
}
678+
590679
int bpf_firewall_install(Unit *u) {
591680
_cleanup_free_ char *path = NULL;
592681
CGroupContext *cc;
@@ -614,6 +703,9 @@ int bpf_firewall_install(Unit *u) {
614703
log_unit_debug(u, "BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units.");
615704
return -EOPNOTSUPP;
616705
}
706+
if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI &&
707+
(!set_isempty(u->ip_bpf_custom_ingress) || !set_isempty(u->ip_bpf_custom_egress)))
708+
return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "BPF_F_ALLOW_MULTI not supported on this manager, cannot attach custom BPF programs.");
617709

618710
r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
619711
if (r < 0)
@@ -628,7 +720,8 @@ int bpf_firewall_install(Unit *u) {
628720
u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed);
629721

630722
if (u->ip_bpf_egress) {
631-
r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags);
723+
r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path,
724+
flags | (set_isempty(u->ip_bpf_custom_egress) ? 0 : BPF_F_ALLOW_MULTI));
632725
if (r < 0)
633726
return log_unit_error_errno(u, r, "Attaching egress BPF program to cgroup %s failed: %m", path);
634727

@@ -637,13 +730,22 @@ int bpf_firewall_install(Unit *u) {
637730
}
638731

639732
if (u->ip_bpf_ingress) {
640-
r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, flags);
733+
r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path,
734+
flags | (set_isempty(u->ip_bpf_custom_ingress) ? 0 : BPF_F_ALLOW_MULTI));
641735
if (r < 0)
642736
return log_unit_error_errno(u, r, "Attaching ingress BPF program to cgroup %s failed: %m", path);
643737

644738
u->ip_bpf_ingress_installed = bpf_program_ref(u->ip_bpf_ingress);
645739
}
646740

741+
r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_EGRESS, &u->ip_bpf_custom_egress, &u->ip_bpf_custom_egress_installed);
742+
if (r < 0)
743+
return r;
744+
745+
r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_INGRESS, &u->ip_bpf_custom_ingress, &u->ip_bpf_custom_ingress_installed);
746+
if (r < 0)
747+
return r;
748+
647749
return 0;
648750
}
649751

src/core/bpf-firewall.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ int bpf_firewall_supported(void);
1515

1616
int bpf_firewall_compile(Unit *u);
1717
int bpf_firewall_install(Unit *u);
18+
int bpf_firewall_load_custom(Unit *u);
1819

1920
int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets);
2021
int bpf_firewall_reset_accounting(int map_fd);

src/core/cgroup.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ void cgroup_context_done(CGroupContext *c) {
199199

200200
c->ip_address_allow = ip_address_access_free_all(c->ip_address_allow);
201201
c->ip_address_deny = ip_address_access_free_all(c->ip_address_deny);
202+
203+
c->ip_filters_ingress = strv_free(c->ip_filters_ingress);
204+
c->ip_filters_egress = strv_free(c->ip_filters_egress);
202205
}
203206

204207
void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
@@ -210,6 +213,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
210213
CGroupBlockIODeviceWeight *w;
211214
CGroupDeviceAllow *a;
212215
IPAddressAccessItem *iaai;
216+
char **path;
213217
char u[FORMAT_TIMESPAN_MAX];
214218
char v[FORMAT_TIMESPAN_MAX];
215219

@@ -360,6 +364,12 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
360364
(void) in_addr_to_string(iaai->family, &iaai->address, &k);
361365
fprintf(f, "%sIPAddressDeny=%s/%u\n", prefix, strnull(k), iaai->prefixlen);
362366
}
367+
368+
STRV_FOREACH(path, c->ip_filters_ingress)
369+
fprintf(f, "%sIPIngressFilterPath=%s\n", prefix, *path);
370+
371+
STRV_FOREACH(path, c->ip_filters_egress)
372+
fprintf(f, "%sIPEgressFilterPath=%s\n", prefix, *path);
363373
}
364374

365375
int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode) {
@@ -945,6 +955,7 @@ static void cgroup_apply_firewall(Unit *u) {
945955
if (bpf_firewall_compile(u) < 0)
946956
return;
947957

958+
(void) bpf_firewall_load_custom(u);
948959
(void) bpf_firewall_install(u);
949960
}
950961

@@ -1353,7 +1364,9 @@ static bool unit_get_needs_bpf_firewall(Unit *u) {
13531364

13541365
if (c->ip_accounting ||
13551366
c->ip_address_allow ||
1356-
c->ip_address_deny)
1367+
c->ip_address_deny ||
1368+
c->ip_filters_ingress ||
1369+
c->ip_filters_egress)
13571370
return true;
13581371

13591372
/* If any parent slice has an IP access list defined, it applies too */
@@ -1919,6 +1932,12 @@ int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) {
19191932
if (set_isempty(pids))
19201933
return 0;
19211934

1935+
/* Load any custom firewall BPF programs here once to test if they are existing and actually loadable.
1936+
* Fail here early since later errors in the call chain unit_realize_cgroup to cgroup_context_apply are ignored. */
1937+
r = bpf_firewall_load_custom(u);
1938+
if (r < 0)
1939+
return r;
1940+
19221941
r = unit_realize_cgroup(u);
19231942
if (r < 0)
19241943
return r;

src/core/cgroup.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ struct CGroupContext {
114114
LIST_HEAD(IPAddressAccessItem, ip_address_allow);
115115
LIST_HEAD(IPAddressAccessItem, ip_address_deny);
116116

117+
char **ip_filters_ingress;
118+
char **ip_filters_egress;
119+
117120
/* For legacy hierarchies */
118121
uint64_t cpu_shares;
119122
uint64_t startup_cpu_shares;

0 commit comments

Comments
 (0)