Skip to content

Commit

Permalink
bpf: Implement CAP_BPF
Browse files Browse the repository at this point in the history
Implement permissions as stated in uapi/linux/capability.h
In order to do that the verifier allow_ptr_leaks flag is split
into four flags and they are set as:
  env->allow_ptr_leaks = bpf_allow_ptr_leaks();
  env->bypass_spec_v1 = bpf_bypass_spec_v1();
  env->bypass_spec_v4 = bpf_bypass_spec_v4();
  env->bpf_capable = bpf_capable();

The first three currently equivalent to perfmon_capable(), since leaking kernel
pointers and reading kernel memory via side channel attacks is roughly
equivalent to reading kernel memory with cap_perfmon.

'bpf_capable' enables bounded loops, precision tracking, bpf to bpf calls and
other verifier features. 'allow_ptr_leaks' enable ptr leaks, ptr conversions,
subtraction of pointers. 'bypass_spec_v1' disables speculative analysis in the
verifier, run time mitigations in bpf array, and enables indirect variable
access in bpf programs. 'bypass_spec_v4' disables emission of sanitation code
by the verifier.

That means that the networking BPF program loaded with CAP_BPF + CAP_NET_ADMIN
will have speculative checks done by the verifier and other spectre mitigation
applied. Such networking BPF program will not be able to leak kernel pointers
and will not be able to access arbitrary kernel memory.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200513230355.7858-3-alexei.starovoitov@gmail.com
  • Loading branch information
Alexei Starovoitov authored and borkmann committed May 15, 2020
1 parent a17b53c commit 2c78ee8
Show file tree
Hide file tree
Showing 19 changed files with 134 additions and 60 deletions.
2 changes: 1 addition & 1 deletion drivers/media/rc/bpf-lirc.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_trace_printk:
if (capable(CAP_SYS_ADMIN))
if (perfmon_capable())
return bpf_get_trace_printk_proto();
/* fall through */
default:
Expand Down
18 changes: 17 additions & 1 deletion include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <linux/mutex.h>
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/capability.h>

struct bpf_verifier_env;
struct bpf_verifier_log;
Expand Down Expand Up @@ -119,7 +120,7 @@ struct bpf_map {
struct bpf_map_memory memory;
char name[BPF_OBJ_NAME_LEN];
u32 btf_vmlinux_value_type_id;
bool unpriv_array;
bool bypass_spec_v1;
bool frozen; /* write-once; write-protected by freeze_mutex */
/* 22 bytes hole */

Expand Down Expand Up @@ -1095,6 +1096,21 @@ struct bpf_map *bpf_map_get_curr_or_next(u32 *id);

extern int sysctl_unprivileged_bpf_disabled;

static inline bool bpf_allow_ptr_leaks(void)
{
return perfmon_capable();
}

static inline bool bpf_bypass_spec_v1(void)
{
return perfmon_capable();
}

static inline bool bpf_bypass_spec_v4(void)
{
return perfmon_capable();
}

int bpf_map_new_fd(struct bpf_map *map, int flags);
int bpf_prog_new_fd(struct bpf_prog *prog);

Expand Down
3 changes: 3 additions & 0 deletions include/linux/bpf_verifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,9 @@ struct bpf_verifier_env {
u32 used_map_cnt; /* number of used maps */
u32 id_gen; /* used to generate unique reg IDs */
bool allow_ptr_leaks;
bool bpf_capable;
bool bypass_spec_v1;
bool bypass_spec_v4;
bool seen_direct_write;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
const struct bpf_line_info *prev_linfo;
Expand Down
10 changes: 5 additions & 5 deletions kernel/bpf/arraymap.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
int ret, numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
bool unpriv = !capable(CAP_SYS_ADMIN);
bool bypass_spec_v1 = bpf_bypass_spec_v1();
u64 cost, array_size, mask64;
struct bpf_map_memory mem;
struct bpf_array *array;
Expand All @@ -95,7 +95,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
mask64 -= 1;

index_mask = mask64;
if (unpriv) {
if (!bypass_spec_v1) {
/* round up array size to nearest power of 2,
* since cpu will speculate within index_mask limits
*/
Expand Down Expand Up @@ -149,7 +149,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
return ERR_PTR(-ENOMEM);
}
array->index_mask = index_mask;
array->map.unpriv_array = unpriv;
array->map.bypass_spec_v1 = bypass_spec_v1;

/* copy mandatory map attributes */
bpf_map_init_from_attr(&array->map, attr);
Expand Down Expand Up @@ -219,7 +219,7 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)

*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
if (map->unpriv_array) {
if (!map->bypass_spec_v1) {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
} else {
Expand Down Expand Up @@ -1053,7 +1053,7 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map,

*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
if (map->unpriv_array) {
if (!map->bypass_spec_v1) {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
} else {
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/bpf_struct_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
struct bpf_map *map;
int err;

if (!capable(CAP_SYS_ADMIN))
if (!bpf_capable())
return ERR_PTR(-EPERM);

st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id);
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -646,7 +646,7 @@ static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
void bpf_prog_kallsyms_add(struct bpf_prog *fp)
{
if (!bpf_prog_kallsyms_candidate(fp) ||
!capable(CAP_SYS_ADMIN))
!bpf_capable())
return;

bpf_prog_ksym_set_addr(fp);
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/cpumap.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
u64 cost;
int ret;

if (!capable(CAP_SYS_ADMIN))
if (!bpf_capable())
return ERR_PTR(-EPERM);

/* check sanity of attributes */
Expand Down
4 changes: 2 additions & 2 deletions kernel/bpf/hashtab.c
Original file line number Diff line number Diff line change
Expand Up @@ -359,9 +359,9 @@ static int htab_map_alloc_check(union bpf_attr *attr)
BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
offsetof(struct htab_elem, hash_node.pprev));

if (lru && !capable(CAP_SYS_ADMIN))
if (lru && !bpf_capable())
/* LRU implementation is much complicated than other
* maps. Hence, limit to CAP_SYS_ADMIN for now.
* maps. Hence, limit to CAP_BPF.
*/
return -EPERM;

Expand Down
4 changes: 3 additions & 1 deletion kernel/bpf/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,7 @@ bpf_base_func_proto(enum bpf_func_id func_id)
break;
}

if (!capable(CAP_SYS_ADMIN))
if (!bpf_capable())
return NULL;

switch (func_id) {
Expand All @@ -642,6 +642,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_spin_unlock:
return &bpf_spin_unlock_proto;
case BPF_FUNC_trace_printk:
if (!perfmon_capable())
return NULL;
return bpf_get_trace_printk_proto();
case BPF_FUNC_jiffies64:
return &bpf_jiffies64_proto;
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/lpm_trie.c
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
u64 cost = sizeof(*trie), cost_per_node;
int ret;

if (!capable(CAP_SYS_ADMIN))
if (!bpf_capable())
return ERR_PTR(-EPERM);

/* check sanity of attributes */
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/map_in_map.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
/* Misc members not needed in bpf_map_meta_equal() check. */
inner_map_meta->ops = inner_map->ops;
if (inner_map->ops == &array_map_ops) {
inner_map_meta->unpriv_array = inner_map->unpriv_array;
inner_map_meta->bypass_spec_v1 = inner_map->bypass_spec_v1;
container_of(inner_map_meta, struct bpf_array, map)->index_mask =
container_of(inner_map, struct bpf_array, map)->index_mask;
}
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/queue_stack_maps.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
/* Called from syscall */
static int queue_stack_map_alloc_check(union bpf_attr *attr)
{
if (!capable(CAP_SYS_ADMIN))
if (!bpf_capable())
return -EPERM;

/* check sanity of attributes */
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/reuseport_array.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
struct bpf_map_memory mem;
u64 array_size;

if (!capable(CAP_SYS_ADMIN))
if (!bpf_capable())
return ERR_PTR(-EPERM);

array_size = sizeof(*array);
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/stackmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
u64 cost, n_buckets;
int err;

if (!capable(CAP_SYS_ADMIN))
if (!bpf_capable())
return ERR_PTR(-EPERM);

if (attr->map_flags & ~STACK_CREATE_FLAG_MASK)
Expand Down
89 changes: 68 additions & 21 deletions kernel/bpf/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -1534,7 +1534,7 @@ static int map_freeze(const union bpf_attr *attr)
err = -EBUSY;
goto err_put;
}
if (!capable(CAP_SYS_ADMIN)) {
if (!bpf_capable()) {
err = -EPERM;
goto err_put;
}
Expand Down Expand Up @@ -2009,6 +2009,55 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
}
}

static bool is_net_admin_prog_type(enum bpf_prog_type prog_type)
{
switch (prog_type) {
case BPF_PROG_TYPE_SCHED_CLS:
case BPF_PROG_TYPE_SCHED_ACT:
case BPF_PROG_TYPE_XDP:
case BPF_PROG_TYPE_LWT_IN:
case BPF_PROG_TYPE_LWT_OUT:
case BPF_PROG_TYPE_LWT_XMIT:
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
case BPF_PROG_TYPE_SK_SKB:
case BPF_PROG_TYPE_SK_MSG:
case BPF_PROG_TYPE_LIRC_MODE2:
case BPF_PROG_TYPE_FLOW_DISSECTOR:
case BPF_PROG_TYPE_CGROUP_DEVICE:
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
case BPF_PROG_TYPE_CGROUP_SYSCTL:
case BPF_PROG_TYPE_SOCK_OPS:
case BPF_PROG_TYPE_EXT: /* extends any prog */
return true;
case BPF_PROG_TYPE_CGROUP_SKB:
/* always unpriv */
case BPF_PROG_TYPE_SK_REUSEPORT:
/* equivalent to SOCKET_FILTER. need CAP_BPF only */
default:
return false;
}
}

static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
{
switch (prog_type) {
case BPF_PROG_TYPE_KPROBE:
case BPF_PROG_TYPE_TRACEPOINT:
case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_RAW_TRACEPOINT:
case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
case BPF_PROG_TYPE_TRACING:
case BPF_PROG_TYPE_LSM:
case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */
case BPF_PROG_TYPE_EXT: /* extends any prog */
return true;
default:
return false;
}
}

/* last field in 'union bpf_attr' used by this command */
#define BPF_PROG_LOAD_LAST_FIELD attach_prog_fd

Expand All @@ -2031,7 +2080,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)

if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
(attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
!capable(CAP_SYS_ADMIN))
!bpf_capable())
return -EPERM;

/* copy eBPF program license from user space */
Expand All @@ -2044,11 +2093,16 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
is_gpl = license_is_gpl_compatible(license);

if (attr->insn_cnt == 0 ||
attr->insn_cnt > (capable(CAP_SYS_ADMIN) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
return -E2BIG;
if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
type != BPF_PROG_TYPE_CGROUP_SKB &&
!capable(CAP_SYS_ADMIN))
!bpf_capable())
return -EPERM;

if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN))
return -EPERM;
if (is_perfmon_prog_type(type) && !perfmon_capable())
return -EPERM;

bpf_prog_load_fixup_attach_type(attr);
Expand Down Expand Up @@ -2682,6 +2736,11 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
case BPF_PROG_TYPE_CGROUP_SKB:
if (!capable(CAP_NET_ADMIN))
/* cg-skb progs can be loaded by unpriv user.
* check permissions at attach time.
*/
return -EPERM;
return prog->enforce_expected_attach_type &&
prog->expected_attach_type != attach_type ?
-EINVAL : 0;
Expand Down Expand Up @@ -2747,9 +2806,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
struct bpf_prog *prog;
int ret;

if (!capable(CAP_NET_ADMIN))
return -EPERM;

if (CHECK_ATTR(BPF_PROG_ATTACH))
return -EINVAL;

Expand Down Expand Up @@ -2804,9 +2860,6 @@ static int bpf_prog_detach(const union bpf_attr *attr)
{
enum bpf_prog_type ptype;

if (!capable(CAP_NET_ADMIN))
return -EPERM;

if (CHECK_ATTR(BPF_PROG_DETACH))
return -EINVAL;

Expand All @@ -2819,6 +2872,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
case BPF_PROG_TYPE_LIRC_MODE2:
return lirc_prog_detach(attr);
case BPF_PROG_TYPE_FLOW_DISSECTOR:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
return skb_flow_dissector_bpf_prog_detach(attr);
case BPF_PROG_TYPE_CGROUP_DEVICE:
case BPF_PROG_TYPE_CGROUP_SKB:
Expand Down Expand Up @@ -2882,8 +2937,6 @@ static int bpf_prog_test_run(const union bpf_attr *attr,
struct bpf_prog *prog;
int ret = -ENOTSUPP;

if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (CHECK_ATTR(BPF_PROG_TEST_RUN))
return -EINVAL;

Expand Down Expand Up @@ -3184,7 +3237,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
info.run_time_ns = stats.nsecs;
info.run_cnt = stats.cnt;

if (!capable(CAP_SYS_ADMIN)) {
if (!bpf_capable()) {
info.jited_prog_len = 0;
info.xlated_prog_len = 0;
info.nr_jited_ksyms = 0;
Expand Down Expand Up @@ -3543,7 +3596,7 @@ static int bpf_btf_load(const union bpf_attr *attr)
if (CHECK_ATTR(BPF_BTF_LOAD))
return -EINVAL;

if (!capable(CAP_SYS_ADMIN))
if (!bpf_capable())
return -EPERM;

return btf_new_fd(attr);
Expand Down Expand Up @@ -3766,9 +3819,6 @@ static int link_create(union bpf_attr *attr)
struct bpf_prog *prog;
int ret;

if (!capable(CAP_NET_ADMIN))
return -EPERM;

if (CHECK_ATTR(BPF_LINK_CREATE))
return -EINVAL;

Expand Down Expand Up @@ -3817,9 +3867,6 @@ static int link_update(union bpf_attr *attr)
u32 flags;
int ret;

if (!capable(CAP_NET_ADMIN))
return -EPERM;

if (CHECK_ATTR(BPF_LINK_UPDATE))
return -EINVAL;

Expand Down Expand Up @@ -3988,7 +4035,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
union bpf_attr attr;
int err;

if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
return -EPERM;

err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
Expand Down

0 comments on commit 2c78ee8

Please sign in to comment.