Skip to content

Commit

Permalink
bpf: Add bpf_sock_destroy kfunc
Browse files Browse the repository at this point in the history
The socket destroy kfunc is used to forcefully terminate sockets from
certain BPF contexts. We plan to use the capability in Cilium
load-balancing to terminate client sockets that continue to connect to
deleted backends.  The other use case is on-the-fly policy enforcement
where existing socket connections prevented by policies need to be
forcefully terminated.  The kfunc also allows terminating sockets that may
or may not be actively sending traffic.

The kfunc can currently be called only from BPF TCP and UDP iterators
where users can filter, and terminate selected sockets. More
specifically, it can only be called from  BPF contexts that ensure
socket locking in order to allow synchronous execution of protocol
specific `diag_destroy` handlers. The previous commit that batches UDP
sockets during iteration facilitated a synchronous invocation of the UDP
destroy callback from BPF context by skipping socket locks in
`udp_abort`. TCP iterator already supported batching of sockets being
iterated. To that end, `tracing_iter_filter` callback filter is added so
that verifier can restrict the kfunc to programs with `BPF_TRACE_ITER`
attach type, and reject other programs.

The kfunc takes `sock_common` type argument, even though it expects, and
casts them to a `sock` pointer. This enables the verifier to allow the
sock_destroy kfunc to be called for TCP with `sock_common` and UDP with
`sock` structs. Furthermore, as `sock_common` only has a subset of
certain fields of `sock`, casting pointer to the latter type might not
always be safe for certain sockets like request sockets, but these have a
special handling in the diag_destroy handlers.

Additionally, the kfunc is defined with `KF_TRUSTED_ARGS` flag to avoid the
cases where a `PTR_TO_BTF_ID` sk is obtained by following another pointer.
eg. getting a sk pointer (may be even NULL) by following another sk
pointer. The pointer socket argument passed in TCP and UDP iterators is
tagged as `PTR_TRUSTED` in {tcp,udp}_reg_info.  The TRUSTED arg changes
are contributed by Martin KaFai Lau <martin.lau@kernel.org>.

Signed-off-by: Aditi Ghag <aditi.ghag@isovalent.com>
Link: https://lore.kernel.org/r/20230519225157.760788-8-aditi.ghag@isovalent.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
  • Loading branch information
aditighag authored and Martin KaFai Lau committed May 20, 2023
1 parent e924e80 commit 4ddbcb8
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 7 deletions.
63 changes: 63 additions & 0 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -11723,3 +11723,66 @@ static int __init bpf_kfunc_init(void)
return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
}
late_initcall(bpf_kfunc_init);

/* Disables missing prototype warnings */
__diag_push();
__diag_ignore_all("-Wmissing-prototypes",
"Global functions as their definitions will be in vmlinux BTF");

/* bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code.
*
* The function expects a non-NULL pointer to a socket, and invokes the
* protocol specific socket destroy handlers.
*
* The helper can only be called from BPF contexts that have acquired the socket
* locks.
*
* Parameters:
* @sock: Pointer to socket to be destroyed
*
* Return:
* On error, may return EPROTONOSUPPORT, EINVAL.
* EPROTONOSUPPORT if protocol specific destroy handler is not supported.
* 0 otherwise
*/
__bpf_kfunc int bpf_sock_destroy(struct sock_common *sock)
{
struct sock *sk = (struct sock *)sock;

/* The locking semantics that allow for synchronous execution of the
* destroy handlers are only supported for TCP and UDP.
* Supporting protocols will need to acquire sock lock in the BPF context
* prior to invoking this kfunc.
*/
if (!sk->sk_prot->diag_destroy || (sk->sk_protocol != IPPROTO_TCP &&
sk->sk_protocol != IPPROTO_UDP))
return -EOPNOTSUPP;

return sk->sk_prot->diag_destroy(sk, ECONNABORTED);
}

__diag_pop()

BTF_SET8_START(bpf_sk_iter_kfunc_ids)
BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS)
BTF_SET8_END(bpf_sk_iter_kfunc_ids)

static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id)
{
if (btf_id_set8_contains(&bpf_sk_iter_kfunc_ids, kfunc_id) &&
prog->expected_attach_type != BPF_TRACE_ITER)
return -EACCES;
return 0;
}

static const struct btf_kfunc_id_set bpf_sk_iter_kfunc_set = {
.owner = THIS_MODULE,
.set = &bpf_sk_iter_kfunc_ids,
.filter = tracing_iter_filter,
};

static int init_subsystem(void)
{
return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_sk_iter_kfunc_set);
}
late_initcall(init_subsystem);
9 changes: 6 additions & 3 deletions net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -4682,8 +4682,10 @@ int tcp_abort(struct sock *sk, int err)
return 0;
}

/* Don't race with userspace socket closes such as tcp_close. */
lock_sock(sk);
/* BPF context ensures sock locking. */
if (!has_current_bpf_ctx())
/* Don't race with userspace socket closes such as tcp_close. */
lock_sock(sk);

if (sk->sk_state == TCP_LISTEN) {
tcp_set_state(sk, TCP_CLOSE);
Expand All @@ -4707,7 +4709,8 @@ int tcp_abort(struct sock *sk, int err)
bh_unlock_sock(sk);
local_bh_enable();
tcp_write_queue_purge(sk);
release_sock(sk);
if (!has_current_bpf_ctx())
release_sock(sk);
return 0;
}
EXPORT_SYMBOL_GPL(tcp_abort);
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/tcp_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -3355,7 +3355,7 @@ static struct bpf_iter_reg tcp_reg_info = {
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__tcp, sk_common),
PTR_TO_BTF_ID_OR_NULL },
PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
},
.get_func_proto = bpf_iter_tcp_get_func_proto,
.seq_info = &tcp_seq_info,
Expand Down
8 changes: 5 additions & 3 deletions net/ipv4/udp.c
Original file line number Diff line number Diff line change
Expand Up @@ -2930,7 +2930,8 @@ EXPORT_SYMBOL(udp_poll);

int udp_abort(struct sock *sk, int err)
{
lock_sock(sk);
if (!has_current_bpf_ctx())
lock_sock(sk);

/* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing
* with close()
Expand All @@ -2943,7 +2944,8 @@ int udp_abort(struct sock *sk, int err)
__udp_disconnect(sk, 0);

out:
release_sock(sk);
if (!has_current_bpf_ctx())
release_sock(sk);

return 0;
}
Expand Down Expand Up @@ -3646,7 +3648,7 @@ static struct bpf_iter_reg udp_reg_info = {
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__udp, udp_sk),
PTR_TO_BTF_ID_OR_NULL },
PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
},
.seq_info = &udp_seq_info,
};
Expand Down

0 comments on commit 4ddbcb8

Please sign in to comment.