Skip to content

Commit

Permalink
bpf: Add verifier support for dynptrs
Browse files Browse the repository at this point in the history
This patch adds the bulk of the verifier work for supporting dynamic
pointers (dynptrs) in bpf.

A bpf_dynptr is opaque to the bpf program. It is a 16-byte structure
defined internally as:

struct bpf_dynptr_kern {
    void *data;
    u32 size;
    u32 offset;
} __aligned(8);

The upper 8 bits of *size* is reserved (it contains extra metadata about
read-only status and dynptr type). Consequently, a dynptr only supports
memory less than 16 MB.

There are different types of dynptrs (eg malloc, ringbuf, ...). In this
patchset, the most basic one, dynptrs to a bpf program's local memory,
is added. For now only local memory that is of reg type PTR_TO_MAP_VALUE
is supported.

In the verifier, dynptr state information will be tracked in stack
slots. When the program passes in an uninitialized dynptr
(ARG_PTR_TO_DYNPTR | MEM_UNINIT), the stack slots corresponding
to the frame pointer where the dynptr resides at are marked
STACK_DYNPTR. For helper functions that take in initialized dynptrs (eg
bpf_dynptr_read + bpf_dynptr_write which are added later in this
patchset), the verifier enforces that the dynptr has been initialized
properly by checking that their corresponding stack slots have been
marked as STACK_DYNPTR.

The 6th patch in this patchset adds test cases that the verifier should
successfully reject, such as for example attempting to use a dynptr
after doing a direct write into it inside the bpf program.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/bpf/20220523210712.3641569-2-joannelkoong@gmail.com
  • Loading branch information
joannekoong authored and anakryiko committed May 23, 2022
1 parent 1ec5ee8 commit 97e03f5
Show file tree
Hide file tree
Showing 6 changed files with 243 additions and 3 deletions.
28 changes: 28 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -392,10 +392,15 @@ enum bpf_type_flag {

MEM_UNINIT = BIT(7 + BPF_BASE_TYPE_BITS),

/* DYNPTR points to memory local to the bpf program. */
DYNPTR_TYPE_LOCAL = BIT(8 + BPF_BASE_TYPE_BITS),

__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};

#define DYNPTR_TYPE_FLAG_MASK DYNPTR_TYPE_LOCAL

/* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)

Expand Down Expand Up @@ -438,6 +443,7 @@ enum bpf_arg_type {
ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */
ARG_PTR_TO_TIMER, /* pointer to bpf_timer */
ARG_PTR_TO_KPTR, /* pointer to referenced kptr */
ARG_PTR_TO_DYNPTR, /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */
__BPF_ARG_TYPE_MAX,

/* Extended arg_types. */
Expand Down Expand Up @@ -2376,4 +2382,26 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
u32 **bin_buf, u32 num_args);
void bpf_bprintf_cleanup(void);

/* the implementation of the opaque uapi struct bpf_dynptr */
struct bpf_dynptr_kern {
void *data;
/* Size represents the number of usable bytes of dynptr data.
* If for example the offset is at 4 for a local dynptr whose data is
* of type u64, the number of usable bytes is 4.
*
* The upper 8 bits are reserved. It is as follows:
* Bits 0 - 23 = size
* Bits 24 - 30 = dynptr type
* Bit 31 = whether dynptr is read-only
*/
u32 size;
u32 offset;
} __aligned(8);

enum bpf_dynptr_type {
BPF_DYNPTR_TYPE_INVALID,
/* Points to memory that is local to the bpf program */
BPF_DYNPTR_TYPE_LOCAL,
};

#endif /* _LINUX_BPF_H */
18 changes: 18 additions & 0 deletions include/linux/bpf_verifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,18 @@ struct bpf_reg_state {

u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */

/* For dynptr stack slots */
struct {
enum bpf_dynptr_type type;
/* A dynptr is 16 bytes so it takes up 2 stack slots.
* We need to track which slot is the first slot
* to protect against cases where the user may try to
* pass in an address starting at the second slot of the
* dynptr.
*/
bool first_slot;
} dynptr;

/* Max size from any of the above. */
struct {
unsigned long raw1;
Expand Down Expand Up @@ -174,9 +186,15 @@ enum bpf_stack_slot_type {
STACK_SPILL, /* register spilled into stack */
STACK_MISC, /* BPF program wrote some data into this slot */
STACK_ZERO, /* BPF program wrote constant zero */
/* A dynptr is stored in this stack slot. The type of dynptr
* is stored in bpf_stack_state->spilled_ptr.dynptr.type
*/
STACK_DYNPTR,
};

#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
#define BPF_DYNPTR_SIZE sizeof(struct bpf_dynptr_kern)
#define BPF_DYNPTR_NR_SLOTS (BPF_DYNPTR_SIZE / BPF_REG_SIZE)

struct bpf_stack_state {
struct bpf_reg_state spilled_ptr;
Expand Down
5 changes: 5 additions & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -6528,6 +6528,11 @@ struct bpf_timer {
__u64 :64;
} __attribute__((aligned(8)));

struct bpf_dynptr {
__u64 :64;
__u64 :64;
} __attribute__((aligned(8)));

struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write.
Expand Down
188 changes: 185 additions & 3 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ struct bpf_call_arg_meta {
u32 ret_btf_id;
u32 subprogno;
struct bpf_map_value_off_desc *kptr_off_desc;
u8 uninit_dynptr_regno;
};

struct btf *btf_vmlinux;
Expand Down Expand Up @@ -581,6 +582,7 @@ static char slot_type_char[] = {
[STACK_SPILL] = 'r',
[STACK_MISC] = 'm',
[STACK_ZERO] = '0',
[STACK_DYNPTR] = 'd',
};

static void print_liveness(struct bpf_verifier_env *env,
Expand All @@ -596,6 +598,25 @@ static void print_liveness(struct bpf_verifier_env *env,
verbose(env, "D");
}

static int get_spi(s32 off)
{
return (-off - 1) / BPF_REG_SIZE;
}

static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
{
int allocated_slots = state->allocated_stack / BPF_REG_SIZE;

/* We need to check that slots between [spi - nr_slots + 1, spi] are
* within [0, allocated_stack).
*
* Please note that the spi grows downwards. For example, a dynptr
* takes the size of two stack slots; the first slot will be at
* spi and the second slot will be at spi - 1.
*/
return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
}

static struct bpf_func_state *func(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg)
{
Expand Down Expand Up @@ -647,6 +668,108 @@ static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
env->scratched_stack_slots = ~0ULL;
}

static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
{
switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
case DYNPTR_TYPE_LOCAL:
return BPF_DYNPTR_TYPE_LOCAL;
default:
return BPF_DYNPTR_TYPE_INVALID;
}
}

static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
enum bpf_arg_type arg_type, int insn_idx)
{
struct bpf_func_state *state = func(env, reg);
enum bpf_dynptr_type type;
int spi, i;

spi = get_spi(reg->off);

if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
return -EINVAL;

for (i = 0; i < BPF_REG_SIZE; i++) {
state->stack[spi].slot_type[i] = STACK_DYNPTR;
state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
}

type = arg_to_dynptr_type(arg_type);
if (type == BPF_DYNPTR_TYPE_INVALID)
return -EINVAL;

state->stack[spi].spilled_ptr.dynptr.first_slot = true;
state->stack[spi].spilled_ptr.dynptr.type = type;
state->stack[spi - 1].spilled_ptr.dynptr.type = type;

return 0;
}

static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct bpf_func_state *state = func(env, reg);
int spi, i;

spi = get_spi(reg->off);

if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
return -EINVAL;

for (i = 0; i < BPF_REG_SIZE; i++) {
state->stack[spi].slot_type[i] = STACK_INVALID;
state->stack[spi - 1].slot_type[i] = STACK_INVALID;
}

state->stack[spi].spilled_ptr.dynptr.first_slot = false;
state->stack[spi].spilled_ptr.dynptr.type = 0;
state->stack[spi - 1].spilled_ptr.dynptr.type = 0;

return 0;
}

static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct bpf_func_state *state = func(env, reg);
int spi = get_spi(reg->off);
int i;

if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
return true;

for (i = 0; i < BPF_REG_SIZE; i++) {
if (state->stack[spi].slot_type[i] == STACK_DYNPTR ||
state->stack[spi - 1].slot_type[i] == STACK_DYNPTR)
return false;
}

return true;
}

static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
enum bpf_arg_type arg_type)
{
struct bpf_func_state *state = func(env, reg);
int spi = get_spi(reg->off);
int i;

if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
!state->stack[spi].spilled_ptr.dynptr.first_slot)
return false;

for (i = 0; i < BPF_REG_SIZE; i++) {
if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
return false;
}

/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
if (arg_type == ARG_PTR_TO_DYNPTR)
return true;

return state->stack[spi].spilled_ptr.dynptr.type == arg_to_dynptr_type(arg_type);
}

/* The reg state of a pointer or a bounded scalar was saved when
* it was spilled to the stack.
*/
Expand Down Expand Up @@ -5400,6 +5523,11 @@ static bool arg_type_is_release(enum bpf_arg_type type)
return type & OBJ_RELEASE;
}

static bool arg_type_is_dynptr(enum bpf_arg_type type)
{
return base_type(type) == ARG_PTR_TO_DYNPTR;
}

static int int_ptr_type_to_size(enum bpf_arg_type type)
{
if (type == ARG_PTR_TO_INT)
Expand Down Expand Up @@ -5539,6 +5667,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
[ARG_PTR_TO_TIMER] = &timer_types,
[ARG_PTR_TO_KPTR] = &kptr_types,
[ARG_PTR_TO_DYNPTR] = &stack_ptr_types,
};

static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
Expand Down Expand Up @@ -5628,8 +5757,13 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
bool fixed_off_ok = false;

switch ((u32)type) {
case SCALAR_VALUE:
/* Pointer types where reg offset is explicitly allowed: */
case PTR_TO_STACK:
if (arg_type_is_dynptr(arg_type) && reg->off % BPF_REG_SIZE) {
verbose(env, "cannot pass in dynptr at an offset\n");
return -EINVAL;
}
fallthrough;
case PTR_TO_PACKET:
case PTR_TO_PACKET_META:
case PTR_TO_MAP_KEY:
Expand All @@ -5639,7 +5773,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
case PTR_TO_MEM | MEM_ALLOC:
case PTR_TO_BUF:
case PTR_TO_BUF | MEM_RDONLY:
case PTR_TO_STACK:
case SCALAR_VALUE:
/* Some of the argument types nevertheless require a
* zero register offset.
*/
Expand Down Expand Up @@ -5837,6 +5971,36 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);

err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta);
} else if (arg_type_is_dynptr(arg_type)) {
if (arg_type & MEM_UNINIT) {
if (!is_dynptr_reg_valid_uninit(env, reg)) {
verbose(env, "Dynptr has to be an uninitialized dynptr\n");
return -EINVAL;
}

/* We only support one dynptr being uninitialized at the moment,
* which is sufficient for the helper functions we have right now.
*/
if (meta->uninit_dynptr_regno) {
verbose(env, "verifier internal error: multiple uninitialized dynptr args\n");
return -EFAULT;
}

meta->uninit_dynptr_regno = regno;
} else if (!is_dynptr_reg_valid_init(env, reg, arg_type)) {
const char *err_extra = "";

switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
case DYNPTR_TYPE_LOCAL:
err_extra = "local ";
break;
default:
break;
}
verbose(env, "Expected an initialized %sdynptr as arg #%d\n",
err_extra, arg + 1);
return -EINVAL;
}
} else if (arg_type_is_alloc_size(arg_type)) {
if (!tnum_is_const(reg->var_off)) {
verbose(env, "R%d is not a known constant'\n",
Expand Down Expand Up @@ -6970,9 +7134,27 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn

regs = cur_regs(env);

if (meta.uninit_dynptr_regno) {
/* we write BPF_DW bits (8 bytes) at a time */
for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
err = check_mem_access(env, insn_idx, meta.uninit_dynptr_regno,
i, BPF_DW, BPF_WRITE, -1, false);
if (err)
return err;
}

err = mark_stack_slots_dynptr(env, &regs[meta.uninit_dynptr_regno],
fn->arg_type[meta.uninit_dynptr_regno - BPF_REG_1],
insn_idx);
if (err)
return err;
}

if (meta.release_regno) {
err = -EINVAL;
if (meta.ref_obj_id)
if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1]))
err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
else if (meta.ref_obj_id)
err = release_reference(env, meta.ref_obj_id);
/* meta.ref_obj_id can only be 0 if register that is meant to be
* released is NULL, which must be > R0.
Expand Down
2 changes: 2 additions & 0 deletions scripts/bpf_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,7 @@ def __init__(self, parser):
'struct file',
'struct bpf_timer',
'struct mptcp_sock',
'struct bpf_dynptr',
]
known_types = {
'...',
Expand Down Expand Up @@ -684,6 +685,7 @@ def __init__(self, parser):
'struct file',
'struct bpf_timer',
'struct mptcp_sock',
'struct bpf_dynptr',
}
mapped_types = {
'u8': '__u8',
Expand Down
5 changes: 5 additions & 0 deletions tools/include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -6528,6 +6528,11 @@ struct bpf_timer {
__u64 :64;
} __attribute__((aligned(8)));

struct bpf_dynptr {
__u64 :64;
__u64 :64;
} __attribute__((aligned(8)));

struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write.
Expand Down

0 comments on commit 97e03f5

Please sign in to comment.