Skip to content

Commit

Permalink
Merge branch 'bpf-x64-fix-tailcall-infinite-loop'
Browse files Browse the repository at this point in the history
Leon Hwang says:

====================
bpf, x64: Fix tailcall infinite loop

This patch series fixes a tailcall infinite loop on x64.

From commit ebf7d1f ("bpf, x64: rework pro/epilogue and tailcall
handling in JIT"), the tailcall on x64 works better than before.

From commit e411901 ("bpf: allow for tailcalls in BPF subprograms
for x64 JIT"), tailcall is able to run in BPF subprograms on x64.

From commit 5b92a28 ("bpf: Support attaching tracing BPF program
to other BPF programs"), BPF program is able to trace other BPF programs.

How about combining them all together?

1. FENTRY/FEXIT on a BPF subprogram.
2. A tailcall runs in the BPF subprogram.
3. The tailcall calls the subprogram's caller.

As a result, a tailcall infinite loop comes up. And the loop would halt
the machine.

As we know, in tail call context, the tail_call_cnt propagates by stack
and rax register between BPF subprograms. So do in trampolines.

How did I discover the bug?

From commit 7f6e431 ("bpf: Limit caller's stack depth 256 for
subprogs with tailcalls"), the total stack size limits to around 8KiB.
Then, I write some bpf progs to validate the stack consuming, that are
tailcalls running in bpf2bpf and FENTRY/FEXIT tracing on bpf2bpf.

At that time, accidently, I made a tailcall loop. And then the loop halted
my VM. Without the loop, the bpf progs would consume over 8KiB stack size.
But the _stack-overflow_ did not halt my VM.

With bpf_printk(), I confirmed that the tailcall count limit did not work
expectedly. Next, read the code and fix it.

Thank Ilya Leoshkevich, this bug on s390x has been fixed.

Hopefully, this bug on arm64 will be fixed in near future.
====================

Link: https://lore.kernel.org/r/20230912150442.2009-1-hffilwlqm@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Alexei Starovoitov committed Sep 12, 2023
2 parents 96daa98 + e13b5f2 commit 5bbb9e1
Show file tree
Hide file tree
Showing 7 changed files with 305 additions and 12 deletions.
32 changes: 26 additions & 6 deletions arch/x86/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -303,8 +303,12 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
prog += X86_PATCH_SIZE;
if (!ebpf_from_cbpf) {
if (tail_call_reachable && !is_subprog)
/* When it's the entry of the whole tailcall context,
* zeroing rax means initialising tail_call_cnt.
*/
EMIT2(0x31, 0xC0); /* xor eax, eax */
else
/* Keep the same instruction layout. */
EMIT2(0x66, 0x90); /* nop2 */
}
EMIT1(0x55); /* push rbp */
Expand Down Expand Up @@ -1018,6 +1022,10 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op)

#define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))

/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
#define RESTORE_TAIL_CALL_CNT(stack) \
EMIT3_off32(0x48, 0x8B, 0x85, -round_up(stack, 8) - 8)

static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image,
int oldproglen, struct jit_context *ctx, bool jmp_padding)
{
Expand Down Expand Up @@ -1623,9 +1631,7 @@ st: if (is_imm8(insn->off))

func = (u8 *) __bpf_call_base + imm32;
if (tail_call_reachable) {
/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
EMIT3_off32(0x48, 0x8B, 0x85,
-round_up(bpf_prog->aux->stack_depth, 8) - 8);
RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
if (!imm32)
return -EINVAL;
offs = 7 + x86_call_depth_emit_accounting(&prog, func);
Expand Down Expand Up @@ -2400,6 +2406,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
* [ ... ]
* [ stack_arg2 ]
* RBP - arg_stack_off [ stack_arg1 ]
* RSP [ tail_call_cnt ] BPF_TRAMP_F_TAIL_CALL_CTX
*/

/* room for return value of orig_call or fentry prog */
Expand Down Expand Up @@ -2464,6 +2471,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
else
/* sub rsp, stack_size */
EMIT4(0x48, 0x83, 0xEC, stack_size);
if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
EMIT1(0x50); /* push rax */
/* mov QWORD PTR [rbp - rbx_off], rbx */
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_6, -rbx_off);

Expand Down Expand Up @@ -2516,9 +2525,15 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
restore_regs(m, &prog, regs_off);
save_args(m, &prog, arg_stack_off, true);

if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
/* Before calling the original function, restore the
* tail_call_cnt from stack to rax.
*/
RESTORE_TAIL_CALL_CNT(stack_size);

if (flags & BPF_TRAMP_F_ORIG_STACK) {
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8);
EMIT2(0xff, 0xd0); /* call *rax */
emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, 8);
EMIT2(0xff, 0xd3); /* call *rbx */
} else {
/* call original function */
if (emit_rsb_call(&prog, orig_call, prog)) {
Expand Down Expand Up @@ -2569,7 +2584,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
ret = -EINVAL;
goto cleanup;
}
}
} else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
/* Before running the original function, restore the
* tail_call_cnt from stack to rax.
*/
RESTORE_TAIL_CALL_CNT(stack_size);

/* restore return value of orig_call or fentry prog back into RAX */
if (save_ret)
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
Expand Down
5 changes: 5 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -1035,6 +1035,11 @@ struct btf_func_model {
*/
#define BPF_TRAMP_F_SHARE_IPMODIFY BIT(6)

/* Indicate that current trampoline is in a tail call context. Then, it has to
* cache and restore tail_call_cnt to avoid infinite tail call loop.
*/
#define BPF_TRAMP_F_TAIL_CALL_CTX BIT(7)

/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
* bytes on x86.
*/
Expand Down
4 changes: 2 additions & 2 deletions kernel/bpf/trampoline.c
Original file line number Diff line number Diff line change
Expand Up @@ -415,8 +415,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
goto out;
}

/* clear all bits except SHARE_IPMODIFY */
tr->flags &= BPF_TRAMP_F_SHARE_IPMODIFY;
/* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */
tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX);

if (tlinks[BPF_TRAMP_FEXIT].nr_links ||
tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) {
Expand Down
3 changes: 3 additions & 0 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -19774,6 +19774,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
if (!tr)
return -ENOMEM;

if (tgt_prog && tgt_prog->aux->tail_call_reachable)
tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX;

prog->aux->dst_trampoline = tr;
return 0;
}
Expand Down
Loading

0 comments on commit 5bbb9e1

Please sign in to comment.