Skip to content

Commit

Permalink
bpf: prevent out-of-bounds speculation
Browse files Browse the repository at this point in the history
Under speculation, CPUs may mis-predict branches in bounds checks. Thus,
memory accesses under a bounds check may be speculated even if the
bounds check fails, providing a primitive for building a side channel.

To avoid leaking kernel data round up array-based maps and mask the index
after bounds check, so speculated load with out of bounds index will load
either valid value from the array or zero from the padded area.

Unconditionally mask index for all array types even when max_entries
are not rounded to power of 2 for root user.
When map is created by unpriv user generate a sequence of bpf insns
that includes AND operation to make sure that JITed code includes
the same 'index & index_mask' operation.

If prog_array map is created by unpriv user replace
  bpf_tail_call(ctx, map, index);
with
  if (index >= max_entries) {
    index &= map->index_mask;
    bpf_tail_call(ctx, map, index);
  }
(along with roundup to power 2) to prevent out-of-bounds speculation.
There is secondary redundant 'if (index >= max_entries)' in the interpreter
and in all JITs, but they can be optimized later if necessary.

Other array-like maps (cpumap, devmap, sockmap, perf_event_array, cgroup_array)
cannot be used by unpriv, so no changes there.

That fixes bpf side of "Variant 1: bounds check bypass (CVE-2017-5753)" on
all architectures with and without JIT.

v2->v3:
Daniel noticed that attack potentially can be crafted via syscall commands
without loading the program, so add masking to those paths as well.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
  • Loading branch information
Alexei Starovoitov authored and borkmann committed Jan 8, 2018
1 parent 2b36047 commit b215739
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 11 deletions.
2 changes: 2 additions & 0 deletions include/linux/bpf.h
Expand Up @@ -52,6 +52,7 @@ struct bpf_map {
u32 pages;
u32 id;
int numa_node;
bool unpriv_array;
struct user_struct *user;
const struct bpf_map_ops *ops;
struct work_struct work;
Expand Down Expand Up @@ -221,6 +222,7 @@ struct bpf_prog_aux {
struct bpf_array {
struct bpf_map map;
u32 elem_size;
u32 index_mask;
/* 'ownership' of prog_array is claimed by the first program that
* is going to use this map or by the first program which FD is stored
* in the map to make sure that all callers and callees have the same
Expand Down
47 changes: 36 additions & 11 deletions kernel/bpf/arraymap.c
Expand Up @@ -53,9 +53,10 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
int numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
bool unpriv = !capable(CAP_SYS_ADMIN);
struct bpf_array *array;
u64 array_size;
u32 elem_size;

/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
Expand All @@ -72,11 +73,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)

elem_size = round_up(attr->value_size, 8);

max_entries = attr->max_entries;
index_mask = roundup_pow_of_two(max_entries) - 1;

if (unpriv)
/* round up array size to nearest power of 2,
* since cpu will speculate within index_mask limits
*/
max_entries = index_mask + 1;

array_size = sizeof(*array);
if (percpu)
array_size += (u64) attr->max_entries * sizeof(void *);
array_size += (u64) max_entries * sizeof(void *);
else
array_size += (u64) attr->max_entries * elem_size;
array_size += (u64) max_entries * elem_size;

/* make sure there is no u32 overflow later in round_up() */
if (array_size >= U32_MAX - PAGE_SIZE)
Expand All @@ -86,6 +96,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
array = bpf_map_area_alloc(array_size, numa_node);
if (!array)
return ERR_PTR(-ENOMEM);
array->index_mask = index_mask;
array->map.unpriv_array = unpriv;

/* copy mandatory map attributes */
array->map.map_type = attr->map_type;
Expand Down Expand Up @@ -121,12 +133,13 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
if (unlikely(index >= array->map.max_entries))
return NULL;

return array->value + array->elem_size * index;
return array->value + array->elem_size * (index & array->index_mask);
}

/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_insn *insn = insn_buf;
u32 elem_size = round_up(map->value_size, 8);
const int ret = BPF_REG_0;
Expand All @@ -135,7 +148,12 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)

*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
if (map->unpriv_array) {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
} else {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
}

if (is_power_of_2(elem_size)) {
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
Expand All @@ -157,7 +175,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
if (unlikely(index >= array->map.max_entries))
return NULL;

return this_cpu_ptr(array->pptrs[index]);
return this_cpu_ptr(array->pptrs[index & array->index_mask]);
}

int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
Expand All @@ -177,7 +195,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
*/
size = round_up(map->value_size, 8);
rcu_read_lock();
pptr = array->pptrs[index];
pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
off += size;
Expand Down Expand Up @@ -225,10 +243,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
return -EEXIST;

if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
memcpy(this_cpu_ptr(array->pptrs[index]),
memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
value, map->value_size);
else
memcpy(array->value + array->elem_size * index,
memcpy(array->value +
array->elem_size * (index & array->index_mask),
value, map->value_size);
return 0;
}
Expand Down Expand Up @@ -262,7 +281,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
*/
size = round_up(map->value_size, 8);
rcu_read_lock();
pptr = array->pptrs[index];
pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
off += size;
Expand Down Expand Up @@ -613,6 +632,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
static u32 array_of_map_gen_lookup(struct bpf_map *map,
struct bpf_insn *insn_buf)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 elem_size = round_up(map->value_size, 8);
struct bpf_insn *insn = insn_buf;
const int ret = BPF_REG_0;
Expand All @@ -621,7 +641,12 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map,

*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
if (map->unpriv_array) {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
} else {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
}
if (is_power_of_2(elem_size))
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
else
Expand Down
36 changes: 36 additions & 0 deletions kernel/bpf/verifier.c
Expand Up @@ -1729,6 +1729,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
if (err)
return err;
if (func_id == BPF_FUNC_tail_call) {
if (meta.map_ptr == NULL) {
verbose(env, "verifier bug\n");
return -EINVAL;
}
env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
}
err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
if (err)
return err;
Expand Down Expand Up @@ -4456,6 +4463,35 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
*/
insn->imm = 0;
insn->code = BPF_JMP | BPF_TAIL_CALL;

/* instead of changing every JIT dealing with tail_call
* emit two extra insns:
* if (index >= max_entries) goto out;
* index &= array->index_mask;
* to avoid out-of-bounds cpu speculation
*/
map_ptr = env->insn_aux_data[i + delta].map_ptr;
if (map_ptr == BPF_MAP_PTR_POISON) {
verbose(env, "tail_call obusing map_ptr\n");
return -EINVAL;
}
if (!map_ptr->unpriv_array)
continue;
insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
map_ptr->max_entries, 2);
insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
container_of(map_ptr,
struct bpf_array,
map)->index_mask);
insn_buf[2] = *insn;
cnt = 3;
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;

delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue;
}

Expand Down

0 comments on commit b215739

Please sign in to comment.