Skip to content

Commit

Permalink
bpf: sockmap with sk redirect support
Browse files Browse the repository at this point in the history
Recently we added a new map type called dev map used to forward XDP
packets between ports (6093ec2). This patches introduces a
similar notion for sockets.

A sockmap allows users to add participating sockets to a map. When
sockets are added to the map enough context is stored with the
map entry to use the entry with a new helper

  bpf_sk_redirect_map(map, key, flags)

This helper (analogous to bpf_redirect_map in XDP) is given the map
and an entry in the map. When called from a sockmap program, discussed
below, the skb will be sent on the socket using skb_send_sock().

With the above we need a bpf program to call the helper from that will
then implement the send logic. The initial site implemented in this
series is the recv_sock hook. For this to work we implemented a map
attach command to add attributes to a map. In sockmap we add two
programs a parse program and a verdict program. The parse program
uses strparser to build messages and pass them to the verdict program.
The parse programs use the normal strparser semantics. The verdict
program is of type SK_SKB.

The verdict program returns a verdict SK_DROP, or  SK_REDIRECT for
now. Additional actions may be added later. When SK_REDIRECT is
returned, expected when bpf program uses bpf_sk_redirect_map(), the
sockmap logic will consult per cpu variables set by the helper routine
and pull the sock entry out of the sock map. This pattern follows the
existing redirect logic in cls and xdp programs.

This gives the flow,

 recv_sock -> str_parser (parse_prog) -> verdict_prog -> skb_send_sock
                                                     \
                                                      -> kfree_skb

As an example use case a message based load balancer may use specific
logic in the verdict program to select the sock to send on.

Sample programs are provided in future patches that hopefully illustrate
the user interfaces. Also selftests are in follow-on patches.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
  • Loading branch information
jrfastab committed Aug 16, 2017
1 parent b1a600f commit c89fd73
Show file tree
Hide file tree
Showing 9 changed files with 940 additions and 5 deletions.
7 changes: 5 additions & 2 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <linux/rbtree_latch.h>

struct perf_event;
struct bpf_prog;
struct bpf_map;

/* map is generic key/value storage optionally accesible by eBPF programs */
Expand All @@ -37,6 +38,8 @@ struct bpf_map_ops {
void (*map_fd_put_ptr)(void *ptr);
u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
u32 (*map_fd_sys_lookup_elem)(void *ptr);
int (*map_attach)(struct bpf_map *map,
struct bpf_prog *p1, struct bpf_prog *p2);
};

struct bpf_map {
Expand Down Expand Up @@ -138,8 +141,6 @@ enum bpf_reg_type {
PTR_TO_PACKET_END, /* skb->data + headlen */
};

struct bpf_prog;

/* The information passed from prog-specific *_is_valid_access
* back to the verifier.
*/
Expand Down Expand Up @@ -312,6 +313,7 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);

/* Map specifics */
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
void __dev_map_flush(struct bpf_map *map);

Expand Down Expand Up @@ -391,6 +393,7 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto;
extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
extern const struct bpf_func_proto bpf_get_stackid_proto;
extern const struct bpf_func_proto bpf_sock_map_update_proto;

/* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void);
Expand Down
1 change: 1 addition & 0 deletions include/linux/bpf_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
#ifdef CONFIG_NET
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
#endif
2 changes: 2 additions & 0 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,8 @@ void xdp_do_flush_map(void);
void bpf_warn_invalid_xdp_action(u32 act);
void bpf_warn_invalid_xdp_redirect(u32 ifindex);

struct sock *do_sk_redirect_map(void);

#ifdef CONFIG_BPF_JIT
extern int bpf_jit_enable;
extern int bpf_jit_harden;
Expand Down
33 changes: 32 additions & 1 deletion include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_ARRAY_OF_MAPS,
BPF_MAP_TYPE_HASH_OF_MAPS,
BPF_MAP_TYPE_DEVMAP,
BPF_MAP_TYPE_SOCKMAP,
};

enum bpf_prog_type {
Expand All @@ -135,11 +136,15 @@ enum bpf_attach_type {
BPF_CGROUP_INET_EGRESS,
BPF_CGROUP_INET_SOCK_CREATE,
BPF_CGROUP_SOCK_OPS,
BPF_CGROUP_SMAP_INGRESS,
__MAX_BPF_ATTACH_TYPE
};

#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE

/* If BPF_SOCKMAP_STRPARSER is used sockmap will use strparser on receive */
#define BPF_SOCKMAP_STRPARSER (1U << 0)

/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
* to the given target_fd cgroup the descendent cgroup will be able to
* override effective bpf program that was inherited from this cgroup
Expand Down Expand Up @@ -211,6 +216,7 @@ union bpf_attr {
__u32 attach_bpf_fd; /* eBPF program to attach */
__u32 attach_type;
__u32 attach_flags;
__u32 attach_bpf_fd2;
};

struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
Expand Down Expand Up @@ -557,6 +563,23 @@ union bpf_attr {
* @mode: operation mode (enum bpf_adj_room_mode)
* @flags: reserved for future use
* Return: 0 on success or negative error code
*
* int bpf_sk_redirect_map(map, key, flags)
* Redirect skb to a sock in map using key as a lookup key for the
* sock in map.
* @map: pointer to sockmap
* @key: key to lookup sock in map
* @flags: reserved for future use
* Return: SK_REDIRECT
*
* int bpf_sock_map_update(skops, map, key, flags, map_flags)
* @skops: pointer to bpf_sock_ops
* @map: pointer to sockmap to update
* @key: key to insert/update sock in map
* @flags: same flags as map update elem
* @map_flags: sock map specific flags
* bit 1: Enable strparser
* other bits: reserved
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
Expand Down Expand Up @@ -610,7 +633,9 @@ union bpf_attr {
FN(set_hash), \
FN(setsockopt), \
FN(skb_adjust_room), \
FN(redirect_map),
FN(redirect_map), \
FN(sk_redirect_map), \
FN(sock_map_update), \

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
Expand Down Expand Up @@ -747,6 +772,12 @@ struct xdp_md {
__u32 data_end;
};

enum sk_action {
SK_ABORTED = 0,
SK_DROP,
SK_REDIRECT,
};

#define BPF_TAG_SIZE 8

struct bpf_prog_info {
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ obj-y := core.o
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
obj-$(CONFIG_BPF_SYSCALL) += devmap.o sockmap.o
endif
ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
Expand Down
Loading

0 comments on commit c89fd73

Please sign in to comment.