Skip to content

Commit

Permalink
Merge branch 'bpf: Add socket destroy capability'
Browse files Browse the repository at this point in the history
Aditi Ghag says:

====================

This patch set adds the capability to destroy sockets in BPF. We plan to
use the capability in Cilium to force client sockets to reconnect when
their remote load-balancing backends are deleted. The other use case is
on-the-fly policy enforcement where existing socket connections
prevented by policies need to be terminated.

The use cases, and more details around
the selected approach were presented at LPC 2022 -
https://lpc.events/event/16/contributions/1358/.
RFC discussion -
https://lore.kernel.org/netdev/CABG=zsBEh-P4NXk23eBJw7eajB5YJeRS7oPXnTAzs=yob4EMoQ@mail.gmail.com/T/#u.
v8 patch series -
https://lore.kernel.org/bpf/20230517175359.527917-1-aditi.ghag@isovalent.com/

v9 highlights:
Address review comments:
Martin:
- Rearranged the kfunc filter patch, and added the missing break
  statement.
- Squashed the extended selftest/bpf patch.
Yonghong:
- Revised commit message for patch 1.

(Below notes are same as v8 patch series that are still relevant. Refer to
earlier patch series versions for other notes.)
- I hit a snag while writing the kfunc where verifier complained about the
  `sock_common` type passed from TCP iterator. With kfuncs, there don't
  seem to be any options available to pass BTF type hints to the verifier
  (equivalent of `ARG_PTR_TO_BTF_ID_SOCK_COMMON`, as was the case with the
  helper).  As a result, I changed the argument type of the sock_destory
  kfunc to `sock_common`.
====================

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
  • Loading branch information
Martin KaFai Lau committed May 20, 2023
2 parents 9343184 + 1a8bc22 commit 18f5588
Show file tree
Hide file tree
Showing 13 changed files with 794 additions and 79 deletions.
18 changes: 11 additions & 7 deletions include/linux/btf.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,14 @@ struct btf_type;
union bpf_attr;
struct btf_show;
struct btf_id_set;
struct bpf_prog;

typedef int (*btf_kfunc_filter_t)(const struct bpf_prog *prog, u32 kfunc_id);

struct btf_kfunc_id_set {
struct module *owner;
struct btf_id_set8 *set;
btf_kfunc_filter_t filter;
};

struct btf_id_dtor_kfunc {
Expand Down Expand Up @@ -479,18 +483,17 @@ static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id)
return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func);
}

struct bpf_prog;
struct bpf_verifier_log;

#ifdef CONFIG_BPF_SYSCALL
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
struct btf *btf_parse_vmlinux(void);
struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
u32 *btf_kfunc_id_set_contains(const struct btf *btf,
enum bpf_prog_type prog_type,
u32 kfunc_btf_id);
u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id);
u32 *btf_kfunc_id_set_contains(const struct btf *btf, u32 kfunc_btf_id,
const struct bpf_prog *prog);
u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id,
const struct bpf_prog *prog);
int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
const struct btf_kfunc_id_set *s);
int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset);
Expand All @@ -517,8 +520,9 @@ static inline const char *btf_name_by_offset(const struct btf *btf,
return NULL;
}
static inline u32 *btf_kfunc_id_set_contains(const struct btf *btf,
enum bpf_prog_type prog_type,
u32 kfunc_btf_id)
u32 kfunc_btf_id,
struct bpf_prog *prog)

{
return NULL;
}
Expand Down
1 change: 0 additions & 1 deletion include/net/udp.h
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,6 @@ struct udp_seq_afinfo {
struct udp_iter_state {
struct seq_net_private p;
int bucket;
struct udp_seq_afinfo *bpf_seq_afinfo;
};

void *udp_seq_start(struct seq_file *seq, loff_t *pos);
Expand Down
65 changes: 54 additions & 11 deletions kernel/bpf/btf.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,10 +222,17 @@ enum btf_kfunc_hook {
enum {
BTF_KFUNC_SET_MAX_CNT = 256,
BTF_DTOR_KFUNC_MAX_CNT = 256,
BTF_KFUNC_FILTER_MAX_CNT = 16,
};

struct btf_kfunc_hook_filter {
btf_kfunc_filter_t filters[BTF_KFUNC_FILTER_MAX_CNT];
u32 nr_filters;
};

struct btf_kfunc_set_tab {
struct btf_id_set8 *sets[BTF_KFUNC_HOOK_MAX];
struct btf_kfunc_hook_filter hook_filters[BTF_KFUNC_HOOK_MAX];
};

struct btf_id_dtor_kfunc_tab {
Expand Down Expand Up @@ -7669,9 +7676,12 @@ static int btf_check_kfunc_protos(struct btf *btf, u32 func_id, u32 func_flags)
/* Kernel Function (kfunc) BTF ID set registration API */

static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
struct btf_id_set8 *add_set)
const struct btf_kfunc_id_set *kset)
{
struct btf_kfunc_hook_filter *hook_filter;
struct btf_id_set8 *add_set = kset->set;
bool vmlinux_set = !btf_is_module(btf);
bool add_filter = !!kset->filter;
struct btf_kfunc_set_tab *tab;
struct btf_id_set8 *set;
u32 set_cnt;
Expand All @@ -7686,6 +7696,24 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
return 0;

tab = btf->kfunc_set_tab;

if (tab && add_filter) {
u32 i;

hook_filter = &tab->hook_filters[hook];
for (i = 0; i < hook_filter->nr_filters; i++) {
if (hook_filter->filters[i] == kset->filter) {
add_filter = false;
break;
}
}

if (add_filter && hook_filter->nr_filters == BTF_KFUNC_FILTER_MAX_CNT) {
ret = -E2BIG;
goto end;
}
}

if (!tab) {
tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN);
if (!tab)
Expand All @@ -7708,7 +7736,7 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
*/
if (!vmlinux_set) {
tab->sets[hook] = add_set;
return 0;
goto do_add_filter;
}

/* In case of vmlinux sets, there may be more than one set being
Expand Down Expand Up @@ -7750,6 +7778,11 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,

sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL);

do_add_filter:
if (add_filter) {
hook_filter = &tab->hook_filters[hook];
hook_filter->filters[hook_filter->nr_filters++] = kset->filter;
}
return 0;
end:
btf_free_kfunc_set_tab(btf);
Expand All @@ -7758,15 +7791,22 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,

static u32 *__btf_kfunc_id_set_contains(const struct btf *btf,
enum btf_kfunc_hook hook,
u32 kfunc_btf_id)
u32 kfunc_btf_id,
const struct bpf_prog *prog)
{
struct btf_kfunc_hook_filter *hook_filter;
struct btf_id_set8 *set;
u32 *id;
u32 *id, i;

if (hook >= BTF_KFUNC_HOOK_MAX)
return NULL;
if (!btf->kfunc_set_tab)
return NULL;
hook_filter = &btf->kfunc_set_tab->hook_filters[hook];
for (i = 0; i < hook_filter->nr_filters; i++) {
if (hook_filter->filters[i](prog, kfunc_btf_id))
return NULL;
}
set = btf->kfunc_set_tab->sets[hook];
if (!set)
return NULL;
Expand Down Expand Up @@ -7821,23 +7861,25 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
* protection for looking up a well-formed btf->kfunc_set_tab.
*/
u32 *btf_kfunc_id_set_contains(const struct btf *btf,
enum bpf_prog_type prog_type,
u32 kfunc_btf_id)
u32 kfunc_btf_id,
const struct bpf_prog *prog)
{
enum bpf_prog_type prog_type = resolve_prog_type(prog);
enum btf_kfunc_hook hook;
u32 *kfunc_flags;

kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id);
kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id, prog);
if (kfunc_flags)
return kfunc_flags;

hook = bpf_prog_type_to_kfunc_hook(prog_type);
return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id);
return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id, prog);
}

u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id)
u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id,
const struct bpf_prog *prog)
{
return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id);
return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id, prog);
}

static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook,
Expand Down Expand Up @@ -7868,7 +7910,8 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook,
goto err_out;
}

ret = btf_populate_kfunc_set(btf, hook, kset->set);
ret = btf_populate_kfunc_set(btf, hook, kset);

err_out:
btf_put(btf);
return ret;
Expand Down
7 changes: 4 additions & 3 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -10939,7 +10939,7 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env,
*kfunc_name = func_name;
func_proto = btf_type_by_id(desc_btf, func->type);

kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id);
kfunc_flags = btf_kfunc_id_set_contains(desc_btf, func_id, env->prog);
if (!kfunc_flags) {
return -EACCES;
}
Expand Down Expand Up @@ -19010,7 +19010,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
* in the fmodret id set with the KF_SLEEPABLE flag.
*/
else {
u32 *flags = btf_kfunc_is_modify_return(btf, btf_id);
u32 *flags = btf_kfunc_is_modify_return(btf, btf_id,
prog);

if (flags && (*flags & KF_SLEEPABLE))
ret = 0;
Expand Down Expand Up @@ -19038,7 +19039,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
return -EINVAL;
}
ret = -EINVAL;
if (btf_kfunc_is_modify_return(btf, btf_id) ||
if (btf_kfunc_is_modify_return(btf, btf_id, prog) ||
!check_attach_modify_return(addr, tname))
ret = 0;
if (ret) {
Expand Down
63 changes: 63 additions & 0 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -11723,3 +11723,66 @@ static int __init bpf_kfunc_init(void)
return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
}
late_initcall(bpf_kfunc_init);

/* Disables missing prototype warnings */
__diag_push();
__diag_ignore_all("-Wmissing-prototypes",
"Global functions as their definitions will be in vmlinux BTF");

/* bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code.
*
* The function expects a non-NULL pointer to a socket, and invokes the
* protocol specific socket destroy handlers.
*
* The helper can only be called from BPF contexts that have acquired the socket
* locks.
*
* Parameters:
* @sock: Pointer to socket to be destroyed
*
* Return:
* On error, may return EPROTONOSUPPORT, EINVAL.
* EPROTONOSUPPORT if protocol specific destroy handler is not supported.
* 0 otherwise
*/
__bpf_kfunc int bpf_sock_destroy(struct sock_common *sock)
{
struct sock *sk = (struct sock *)sock;

/* The locking semantics that allow for synchronous execution of the
* destroy handlers are only supported for TCP and UDP.
* Supporting protocols will need to acquire sock lock in the BPF context
* prior to invoking this kfunc.
*/
if (!sk->sk_prot->diag_destroy || (sk->sk_protocol != IPPROTO_TCP &&
sk->sk_protocol != IPPROTO_UDP))
return -EOPNOTSUPP;

return sk->sk_prot->diag_destroy(sk, ECONNABORTED);
}

__diag_pop()

BTF_SET8_START(bpf_sk_iter_kfunc_ids)
BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS)
BTF_SET8_END(bpf_sk_iter_kfunc_ids)

static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id)
{
if (btf_id_set8_contains(&bpf_sk_iter_kfunc_ids, kfunc_id) &&
prog->expected_attach_type != BPF_TRACE_ITER)
return -EACCES;
return 0;
}

static const struct btf_kfunc_id_set bpf_sk_iter_kfunc_set = {
.owner = THIS_MODULE,
.set = &bpf_sk_iter_kfunc_ids,
.filter = tracing_iter_filter,
};

static int init_subsystem(void)
{
return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_sk_iter_kfunc_set);
}
late_initcall(init_subsystem);
9 changes: 6 additions & 3 deletions net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -4682,8 +4682,10 @@ int tcp_abort(struct sock *sk, int err)
return 0;
}

/* Don't race with userspace socket closes such as tcp_close. */
lock_sock(sk);
/* BPF context ensures sock locking. */
if (!has_current_bpf_ctx())
/* Don't race with userspace socket closes such as tcp_close. */
lock_sock(sk);

if (sk->sk_state == TCP_LISTEN) {
tcp_set_state(sk, TCP_CLOSE);
Expand All @@ -4707,7 +4709,8 @@ int tcp_abort(struct sock *sk, int err)
bh_unlock_sock(sk);
local_bh_enable();
tcp_write_queue_purge(sk);
release_sock(sk);
if (!has_current_bpf_ctx())
release_sock(sk);
return 0;
}
EXPORT_SYMBOL_GPL(tcp_abort);
Expand Down
7 changes: 3 additions & 4 deletions net/ipv4/tcp_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -2962,15 +2962,14 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
struct bpf_iter_meta meta;
struct bpf_prog *prog;
struct sock *sk = v;
bool slow;
uid_t uid;
int ret;

if (v == SEQ_START_TOKEN)
return 0;

if (sk_fullsock(sk))
slow = lock_sock_fast(sk);
lock_sock(sk);

if (unlikely(sk_unhashed(sk))) {
ret = SEQ_SKIP;
Expand All @@ -2994,7 +2993,7 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)

unlock:
if (sk_fullsock(sk))
unlock_sock_fast(sk, slow);
release_sock(sk);
return ret;

}
Expand Down Expand Up @@ -3356,7 +3355,7 @@ static struct bpf_iter_reg tcp_reg_info = {
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__tcp, sk_common),
PTR_TO_BTF_ID_OR_NULL },
PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
},
.get_func_proto = bpf_iter_tcp_get_func_proto,
.seq_info = &tcp_seq_info,
Expand Down
Loading

0 comments on commit 18f5588

Please sign in to comment.