Skip to content

Commit d8eca5b

Browse files
borkmannAlexei Starovoitov
authored andcommitted
bpf: implement lookup-free direct value access for maps
This generic extension to BPF maps allows for directly loading an address residing inside a BPF map value as a single BPF ldimm64 instruction! The idea is similar to what BPF_PSEUDO_MAP_FD does today, which is a special src_reg flag for ldimm64 instruction that indicates that inside the first part of the double insns's imm field is a file descriptor which the verifier then replaces as a full 64bit address of the map into both imm parts. For the newly added BPF_PSEUDO_MAP_VALUE src_reg flag, the idea is the following: the first part of the double insns's imm field is again a file descriptor corresponding to the map, and the second part of the imm field is an offset into the value. The verifier will then replace both imm parts with an address that points into the BPF map value at the given value offset for maps that support this operation. Currently supported is array map with single entry. It is possible to support more than just single map element by reusing both 16bit off fields of the insns as a map index, so full array map lookup could be expressed that way. It hasn't been implemented here due to lack of concrete use case, but could easily be done so in future in a compatible way, since both off fields right now have to be 0 and would correctly denote a map index 0. The BPF_PSEUDO_MAP_VALUE is a distinct flag as otherwise with BPF_PSEUDO_MAP_FD we could not differ offset 0 between load of map pointer versus load of map's value at offset 0, and changing BPF_PSEUDO_MAP_FD's encoding into off by one to differ between regular map pointer and map value pointer would add unnecessary complexity and increases barrier for debugability thus less suitable. Using the second part of the imm field as an offset into the value does /not/ come with limitations since maximum possible value size is in u32 universe anyway. This optimization allows for efficiently retrieving an address to a map value memory area without having to issue a helper call which needs to prepare registers according to calling convention, etc, without needing the extra NULL test, and without having to add the offset in an additional instruction to the value base pointer. The verifier then treats the destination register as PTR_TO_MAP_VALUE with constant reg->off from the user passed offset from the second imm field, and guarantees that this is within bounds of the map value. Any subsequent operations are normally treated as typical map value handling without anything extra needed from verification side. The two map operations for direct value access have been added to array map for now. In future other types could be supported as well depending on the use case. The main use case for this commit is to allow for BPF loader support for global variables that reside in .data/.rodata/.bss sections such that we can directly load the address of them with minimal additional infrastructure required. Loader support has been added in subsequent commits for libbpf library. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
1 parent ff466b5 commit d8eca5b

File tree

9 files changed

+149
-31
lines changed

9 files changed

+149
-31
lines changed

include/linux/bpf.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@ struct bpf_map_ops {
5757
const struct btf *btf,
5858
const struct btf_type *key_type,
5959
const struct btf_type *value_type);
60+
61+
/* Direct value access helpers. */
62+
int (*map_direct_value_addr)(const struct bpf_map *map,
63+
u64 *imm, u32 off);
64+
int (*map_direct_value_meta)(const struct bpf_map *map,
65+
u64 imm, u32 *off);
6066
};
6167

6268
struct bpf_map {

include/linux/bpf_verifier.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,10 @@ struct bpf_insn_aux_data {
224224
unsigned long map_state; /* pointer/poison value for maps */
225225
s32 call_imm; /* saved imm field of call insn */
226226
u32 alu_limit; /* limit for add/sub register with pointer */
227+
struct {
228+
u32 map_index; /* index into used_maps[] */
229+
u32 map_off; /* offset from value base address */
230+
};
227231
};
228232
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
229233
int sanitize_stack_off; /* stack slot to be cleared */

include/uapi/linux/bpf.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,8 +255,19 @@ enum bpf_attach_type {
255255
*/
256256
#define BPF_F_ANY_ALIGNMENT (1U << 1)
257257

258-
/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
258+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
259+
* two extensions:
260+
*
261+
* insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
262+
* insn[0].imm: map fd map fd
263+
* insn[1].imm: 0 offset into value
264+
* insn[0].off: 0 0
265+
* insn[1].off: 0 0
266+
* ldimm64 rewrite: address of map address of map[0]+offset
267+
* verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
268+
*/
259269
#define BPF_PSEUDO_MAP_FD 1
270+
#define BPF_PSEUDO_MAP_VALUE 2
260271

261272
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
262273
* offset to another bpf function

kernel/bpf/arraymap.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,36 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
160160
return array->value + array->elem_size * (index & array->index_mask);
161161
}
162162

163+
static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
164+
u32 off)
165+
{
166+
struct bpf_array *array = container_of(map, struct bpf_array, map);
167+
168+
if (map->max_entries != 1)
169+
return -ENOTSUPP;
170+
if (off >= map->value_size)
171+
return -EINVAL;
172+
173+
*imm = (unsigned long)array->value;
174+
return 0;
175+
}
176+
177+
static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
178+
u32 *off)
179+
{
180+
struct bpf_array *array = container_of(map, struct bpf_array, map);
181+
u64 base = (unsigned long)array->value;
182+
u64 range = array->elem_size;
183+
184+
if (map->max_entries != 1)
185+
return -ENOTSUPP;
186+
if (imm < base || imm >= base + range)
187+
return -ENOENT;
188+
189+
*off = imm - base;
190+
return 0;
191+
}
192+
163193
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
164194
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
165195
{
@@ -419,6 +449,8 @@ const struct bpf_map_ops array_map_ops = {
419449
.map_update_elem = array_map_update_elem,
420450
.map_delete_elem = array_map_delete_elem,
421451
.map_gen_lookup = array_map_gen_lookup,
452+
.map_direct_value_addr = array_map_direct_value_addr,
453+
.map_direct_value_meta = array_map_direct_value_meta,
422454
.map_seq_show_elem = array_map_seq_show_elem,
423455
.map_check_btf = array_map_check_btf,
424456
};

kernel/bpf/core.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,8 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
292292
dst[i] = fp->insnsi[i];
293293
if (!was_ld_map &&
294294
dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
295-
dst[i].src_reg == BPF_PSEUDO_MAP_FD) {
295+
(dst[i].src_reg == BPF_PSEUDO_MAP_FD ||
296+
dst[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
296297
was_ld_map = true;
297298
dst[i].imm = 0;
298299
} else if (was_ld_map &&

kernel/bpf/disasm.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,10 +205,11 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
205205
* part of the ldimm64 insn is accessible.
206206
*/
207207
u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
208-
bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD;
208+
bool is_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD ||
209+
insn->src_reg == BPF_PSEUDO_MAP_VALUE;
209210
char tmp[64];
210211

211-
if (map_ptr && !allow_ptr_leaks)
212+
if (is_ptr && !allow_ptr_leaks)
212213
imm = 0;
213214

214215
verbose(cbs->private_data, "(%02x) r%d = %s\n",

kernel/bpf/syscall.c

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2072,20 +2072,34 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
20722072
}
20732073

20742074
static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
2075-
unsigned long addr)
2075+
unsigned long addr, u32 *off,
2076+
u32 *type)
20762077
{
2078+
const struct bpf_map *map;
20772079
int i;
20782080

2079-
for (i = 0; i < prog->aux->used_map_cnt; i++)
2080-
if (prog->aux->used_maps[i] == (void *)addr)
2081-
return prog->aux->used_maps[i];
2081+
for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
2082+
map = prog->aux->used_maps[i];
2083+
if (map == (void *)addr) {
2084+
*type = BPF_PSEUDO_MAP_FD;
2085+
return map;
2086+
}
2087+
if (!map->ops->map_direct_value_meta)
2088+
continue;
2089+
if (!map->ops->map_direct_value_meta(map, addr, off)) {
2090+
*type = BPF_PSEUDO_MAP_VALUE;
2091+
return map;
2092+
}
2093+
}
2094+
20822095
return NULL;
20832096
}
20842097

20852098
static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
20862099
{
20872100
const struct bpf_map *map;
20882101
struct bpf_insn *insns;
2102+
u32 off, type;
20892103
u64 imm;
20902104
int i;
20912105

@@ -2113,11 +2127,11 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
21132127
continue;
21142128

21152129
imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm;
2116-
map = bpf_map_from_imm(prog, imm);
2130+
map = bpf_map_from_imm(prog, imm, &off, &type);
21172131
if (map) {
2118-
insns[i].src_reg = BPF_PSEUDO_MAP_FD;
2132+
insns[i].src_reg = type;
21192133
insns[i].imm = map->id;
2120-
insns[i + 1].imm = 0;
2134+
insns[i + 1].imm = off;
21212135
continue;
21222136
}
21232137
}

kernel/bpf/verifier.c

Lines changed: 66 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5056,18 +5056,12 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
50565056
return 0;
50575057
}
50585058

5059-
/* return the map pointer stored inside BPF_LD_IMM64 instruction */
5060-
static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
5061-
{
5062-
u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
5063-
5064-
return (struct bpf_map *) (unsigned long) imm64;
5065-
}
5066-
50675059
/* verify BPF_LD_IMM64 instruction */
50685060
static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
50695061
{
5062+
struct bpf_insn_aux_data *aux = cur_aux(env);
50705063
struct bpf_reg_state *regs = cur_regs(env);
5064+
struct bpf_map *map;
50715065
int err;
50725066

50735067
if (BPF_SIZE(insn->code) != BPF_DW) {
@@ -5091,11 +5085,22 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
50915085
return 0;
50925086
}
50935087

5094-
/* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
5095-
BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
5088+
map = env->used_maps[aux->map_index];
5089+
mark_reg_known_zero(env, regs, insn->dst_reg);
5090+
regs[insn->dst_reg].map_ptr = map;
5091+
5092+
if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
5093+
regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
5094+
regs[insn->dst_reg].off = aux->map_off;
5095+
if (map_value_has_spin_lock(map))
5096+
regs[insn->dst_reg].id = ++env->id_gen;
5097+
} else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
5098+
regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
5099+
} else {
5100+
verbose(env, "bpf verifier is misconfigured\n");
5101+
return -EINVAL;
5102+
}
50965103

5097-
regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
5098-
regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
50995104
return 0;
51005105
}
51015106

@@ -6803,8 +6808,10 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
68036808
}
68046809

68056810
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
6811+
struct bpf_insn_aux_data *aux;
68066812
struct bpf_map *map;
68076813
struct fd f;
6814+
u64 addr;
68086815

68096816
if (i == insn_cnt - 1 || insn[1].code != 0 ||
68106817
insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
@@ -6813,13 +6820,19 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
68136820
return -EINVAL;
68146821
}
68156822

6816-
if (insn->src_reg == 0)
6823+
if (insn[0].src_reg == 0)
68176824
/* valid generic load 64-bit imm */
68186825
goto next_insn;
68196826

6820-
if (insn[0].src_reg != BPF_PSEUDO_MAP_FD ||
6821-
insn[1].imm != 0) {
6822-
verbose(env, "unrecognized bpf_ld_imm64 insn\n");
6827+
/* In final convert_pseudo_ld_imm64() step, this is
6828+
* converted into regular 64-bit imm load insn.
6829+
*/
6830+
if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD &&
6831+
insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
6832+
(insn[0].src_reg == BPF_PSEUDO_MAP_FD &&
6833+
insn[1].imm != 0)) {
6834+
verbose(env,
6835+
"unrecognized bpf_ld_imm64 insn\n");
68236836
return -EINVAL;
68246837
}
68256838

@@ -6837,16 +6850,47 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
68376850
return err;
68386851
}
68396852

6840-
/* store map pointer inside BPF_LD_IMM64 instruction */
6841-
insn[0].imm = (u32) (unsigned long) map;
6842-
insn[1].imm = ((u64) (unsigned long) map) >> 32;
6853+
aux = &env->insn_aux_data[i];
6854+
if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
6855+
addr = (unsigned long)map;
6856+
} else {
6857+
u32 off = insn[1].imm;
6858+
6859+
if (off >= BPF_MAX_VAR_OFF) {
6860+
verbose(env, "direct value offset of %u is not allowed\n", off);
6861+
fdput(f);
6862+
return -EINVAL;
6863+
}
6864+
6865+
if (!map->ops->map_direct_value_addr) {
6866+
verbose(env, "no direct value access support for this map type\n");
6867+
fdput(f);
6868+
return -EINVAL;
6869+
}
6870+
6871+
err = map->ops->map_direct_value_addr(map, &addr, off);
6872+
if (err) {
6873+
verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
6874+
map->value_size, off);
6875+
fdput(f);
6876+
return err;
6877+
}
6878+
6879+
aux->map_off = off;
6880+
addr += off;
6881+
}
6882+
6883+
insn[0].imm = (u32)addr;
6884+
insn[1].imm = addr >> 32;
68436885

68446886
/* check whether we recorded this map already */
6845-
for (j = 0; j < env->used_map_cnt; j++)
6887+
for (j = 0; j < env->used_map_cnt; j++) {
68466888
if (env->used_maps[j] == map) {
6889+
aux->map_index = j;
68476890
fdput(f);
68486891
goto next_insn;
68496892
}
6893+
}
68506894

68516895
if (env->used_map_cnt >= MAX_USED_MAPS) {
68526896
fdput(f);
@@ -6863,6 +6907,8 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
68636907
fdput(f);
68646908
return PTR_ERR(map);
68656909
}
6910+
6911+
aux->map_index = env->used_map_cnt;
68666912
env->used_maps[env->used_map_cnt++] = map;
68676913

68686914
if (bpf_map_is_cgroup_storage(map) &&

tools/bpf/bpftool/xlated_dumper.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,9 @@ static const char *print_imm(void *private_data,
195195
if (insn->src_reg == BPF_PSEUDO_MAP_FD)
196196
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
197197
"map[id:%u]", insn->imm);
198+
else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE)
199+
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
200+
"map[id:%u][0]+%u", insn->imm, (insn + 1)->imm);
198201
else
199202
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
200203
"0x%llx", (unsigned long long)full_imm);

0 commit comments

Comments
 (0)