Skip to content

Commit

Permalink
bpf: support cloning sk storage on accept()
Browse files Browse the repository at this point in the history
Add new helper bpf_sk_storage_clone which optionally clones sk storage
and call it from sk_clone_lock.

Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
  • Loading branch information
fomichev authored and borkmann committed Aug 17, 2019
1 parent b0e4701 commit 8f51dfc
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 6 deletions.
10 changes: 10 additions & 0 deletions include/net/bpf_sk_storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,14 @@ void bpf_sk_storage_free(struct sock *sk);
extern const struct bpf_func_proto bpf_sk_storage_get_proto;
extern const struct bpf_func_proto bpf_sk_storage_delete_proto;

#ifdef CONFIG_BPF_SYSCALL
int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
#else
static inline int bpf_sk_storage_clone(const struct sock *sk,
struct sock *newsk)
{
return 0;
}
#endif

#endif /* _BPF_SK_STORAGE_H */
3 changes: 3 additions & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,9 @@ enum bpf_attach_type {
#define BPF_F_RDONLY_PROG (1U << 7)
#define BPF_F_WRONLY_PROG (1U << 8)

/* Clone map from listener for newly accepted socket */
#define BPF_F_CLONE (1U << 9)

/* flags for BPF_PROG_QUERY */
#define BPF_F_QUERY_EFFECTIVE (1U << 0)

Expand Down
104 changes: 101 additions & 3 deletions net/core/bpf_sk_storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@

static atomic_t cache_idx;

#define SK_STORAGE_CREATE_FLAG_MASK \
(BPF_F_NO_PREALLOC | BPF_F_CLONE)

struct bucket {
struct hlist_head list;
raw_spinlock_t lock;
Expand Down Expand Up @@ -209,7 +212,6 @@ static void selem_unlink_sk(struct bpf_sk_storage_elem *selem)
kfree_rcu(sk_storage, rcu);
}

/* sk_storage->lock must be held and sk_storage->list cannot be empty */
static void __selem_link_sk(struct bpf_sk_storage *sk_storage,
struct bpf_sk_storage_elem *selem)
{
Expand Down Expand Up @@ -509,7 +511,7 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
return 0;
}

/* Called by __sk_destruct() */
/* Called by __sk_destruct() & bpf_sk_storage_clone() */
void bpf_sk_storage_free(struct sock *sk)
{
struct bpf_sk_storage_elem *selem;
Expand Down Expand Up @@ -557,6 +559,11 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)

smap = (struct bpf_sk_storage_map *)map;

/* Note that this map might be concurrently cloned from
* bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
* RCU read section to finish before proceeding. New RCU
* read sections should be prevented via bpf_map_inc_not_zero.
*/
synchronize_rcu();

/* bpf prog and the userspace can no longer access this map
Expand Down Expand Up @@ -601,7 +608,9 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)

static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
{
if (attr->map_flags != BPF_F_NO_PREALLOC || attr->max_entries ||
if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK ||
!(attr->map_flags & BPF_F_NO_PREALLOC) ||
attr->max_entries ||
attr->key_size != sizeof(int) || !attr->value_size ||
/* Enforce BTF for userspace sk dumping */
!attr->btf_key_type_id || !attr->btf_value_type_id)
Expand Down Expand Up @@ -739,6 +748,95 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
return err;
}

static struct bpf_sk_storage_elem *
bpf_sk_storage_clone_elem(struct sock *newsk,
struct bpf_sk_storage_map *smap,
struct bpf_sk_storage_elem *selem)
{
struct bpf_sk_storage_elem *copy_selem;

copy_selem = selem_alloc(smap, newsk, NULL, true);
if (!copy_selem)
return NULL;

if (map_value_has_spin_lock(&smap->map))
copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
SDATA(selem)->data, true);
else
copy_map_value(&smap->map, SDATA(copy_selem)->data,
SDATA(selem)->data);

return copy_selem;
}

int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
{
struct bpf_sk_storage *new_sk_storage = NULL;
struct bpf_sk_storage *sk_storage;
struct bpf_sk_storage_elem *selem;
int ret = 0;

RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);

rcu_read_lock();
sk_storage = rcu_dereference(sk->sk_bpf_storage);

if (!sk_storage || hlist_empty(&sk_storage->list))
goto out;

hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
struct bpf_sk_storage_elem *copy_selem;
struct bpf_sk_storage_map *smap;
struct bpf_map *map;

smap = rcu_dereference(SDATA(selem)->smap);
if (!(smap->map.map_flags & BPF_F_CLONE))
continue;

/* Note that for lockless listeners adding new element
* here can race with cleanup in bpf_sk_storage_map_free.
* Try to grab map refcnt to make sure that it's still
* alive and prevent concurrent removal.
*/
map = bpf_map_inc_not_zero(&smap->map, false);
if (IS_ERR(map))
continue;

copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem);
if (!copy_selem) {
ret = -ENOMEM;
bpf_map_put(map);
goto out;
}

if (new_sk_storage) {
selem_link_map(smap, copy_selem);
__selem_link_sk(new_sk_storage, copy_selem);
} else {
ret = sk_storage_alloc(newsk, smap, copy_selem);
if (ret) {
kfree(copy_selem);
atomic_sub(smap->elem_size,
&newsk->sk_omem_alloc);
bpf_map_put(map);
goto out;
}

new_sk_storage = rcu_dereference(copy_selem->sk_storage);
}
bpf_map_put(map);
}

out:
rcu_read_unlock();

/* In case of an error, don't free anything explicitly here, the
* caller is responsible to call bpf_sk_storage_free.
*/

return ret;
}

BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
void *, value, u64, flags)
{
Expand Down
9 changes: 6 additions & 3 deletions net/core/sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -1851,9 +1851,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
goto out;
}
RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
#ifdef CONFIG_BPF_SYSCALL
RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
#endif

if (bpf_sk_storage_clone(sk, newsk)) {
sk_free_unlock_clone(newsk);
newsk = NULL;
goto out;
}

newsk->sk_err = 0;
newsk->sk_err_soft = 0;
Expand Down

0 comments on commit 8f51dfc

Please sign in to comment.