Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request'…
Browse files Browse the repository at this point in the history
… into staging

# gpg: Signature made Fri 04 Jun 2021 08:26:16 BST
# gpg:                using RSA key EF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>" [marginal]
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* remotes/jasowang/tags/net-pull-request:
  MAINTAINERS: Added eBPF maintainers information.
  docs: Added eBPF documentation.
  virtio-net: Added eBPF RSS to virtio-net.
  ebpf: Added eBPF RSS loader.
  ebpf: Added eBPF RSS program.
  net: Added SetSteeringEBPF method for NetClientState.
  net/tap: Added TUNSETSTEERINGEBPF code.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information
pm215 committed Jun 4, 2021
2 parents 5a95f5c + 90322e6 commit 1cbd2d9
Show file tree
Hide file tree
Showing 27 changed files with 1,607 additions and 4 deletions.
8 changes: 8 additions & 0 deletions MAINTAINERS
Expand Up @@ -3316,6 +3316,14 @@ F: include/hw/remote/proxy-memory-listener.h
F: hw/remote/iohub.c
F: include/hw/remote/iohub.h

EBPF:
M: Jason Wang <jasowang@redhat.com>
R: Andrew Melnychenko <andrew@daynix.com>
R: Yuri Benditovich <yuri.benditovich@daynix.com>
S: Maintained
F: ebpf/*
F: tools/ebpf/*

Build and test automation
-------------------------
Build and test automation, general continuous integration
Expand Down
8 changes: 7 additions & 1 deletion configure
Expand Up @@ -328,6 +328,7 @@ vhost_vsock="$default_feature"
vhost_user="no"
vhost_user_blk_server="auto"
vhost_user_fs="$default_feature"
bpf="auto"
kvm="auto"
hax="auto"
hvf="auto"
Expand Down Expand Up @@ -1219,6 +1220,10 @@ for opt do
;;
--enable-membarrier) membarrier="yes"
;;
--disable-bpf) bpf="disabled"
;;
--enable-bpf) bpf="enabled"
;;
--disable-blobs) blobs="false"
;;
--with-pkgversion=*) pkgversion="$optarg"
Expand Down Expand Up @@ -1879,6 +1884,7 @@ disabled with --disable-FEATURE, default is enabled if available
vhost-user vhost-user backend support
vhost-user-blk-server vhost-user-blk server support
vhost-vdpa vhost-vdpa kernel backend support
bpf BPF kernel support
spice spice
spice-protocol spice-protocol
rbd rados block device (rbd)
Expand Down Expand Up @@ -6440,7 +6446,7 @@ if test "$skip_meson" = no; then
-Dattr=$attr -Ddefault_devices=$default_devices \
-Ddocs=$docs -Dsphinx_build=$sphinx_build -Dinstall_blobs=$blobs \
-Dvhost_user_blk_server=$vhost_user_blk_server -Dmultiprocess=$multiprocess \
-Dfuse=$fuse -Dfuse_lseek=$fuse_lseek -Dguest_agent_msi=$guest_agent_msi \
-Dfuse=$fuse -Dfuse_lseek=$fuse_lseek -Dguest_agent_msi=$guest_agent_msi -Dbpf=$bpf\
$(if test "$default_features" = no; then echo "-Dauto_features=disabled"; fi) \
-Dtcg_interpreter=$tcg_interpreter \
$cross_arg \
Expand Down
125 changes: 125 additions & 0 deletions docs/devel/ebpf_rss.rst
@@ -0,0 +1,125 @@
===========================
eBPF RSS virtio-net support
===========================

RSS(Receive Side Scaling) is used to distribute network packets to guest virtqueues
by calculating packet hash. Usually every queue is processed then by a specific guest CPU core.

For now there are 2 RSS implementations in qemu:
- 'in-qemu' RSS (functions if qemu receives network packets, i.e. vhost=off)
- eBPF RSS (can function with also with vhost=on)

eBPF support (CONFIG_EBPF) is enabled by 'configure' script.
To enable eBPF RSS support use './configure --enable-bpf'.

If steering BPF is not set for kernel's TUN module, the TUN uses automatic selection
of rx virtqueue based on lookup table built according to calculated symmetric hash
of transmitted packets.
If steering BPF is set for TUN the BPF code calculates the hash of packet header and
returns the virtqueue number to place the packet to.

Simplified decision formula:

.. code:: C
queue_index = indirection_table[hash(<packet data>)%<indirection_table size>]
Not for all packets, the hash can/should be calculated.

Note: currently, eBPF RSS does not support hash reporting.

eBPF RSS turned on by different combinations of vhost-net, vitrio-net and tap configurations:

- eBPF is used:

tap,vhost=off & virtio-net-pci,rss=on,hash=off

- eBPF is used:

tap,vhost=on & virtio-net-pci,rss=on,hash=off

- 'in-qemu' RSS is used:

tap,vhost=off & virtio-net-pci,rss=on,hash=on

- eBPF is used, hash population feature is not reported to the guest:

tap,vhost=on & virtio-net-pci,rss=on,hash=on

If CONFIG_EBPF is not set then only 'in-qemu' RSS is supported.
Also 'in-qemu' RSS, as a fallback, is used if the eBPF program failed to load or set to TUN.

RSS eBPF program
----------------

RSS program located in ebpf/rss.bpf.skeleton.h generated by bpftool.
So the program is part of the qemu binary.
Initially, the eBPF program was compiled by clang and source code located at tools/ebpf/rss.bpf.c.
Prerequisites to recompile the eBPF program (regenerate ebpf/rss.bpf.skeleton.h):

llvm, clang, kernel source tree, bpftool
Adjust Makefile.ebpf to reflect the location of the kernel source tree

$ cd tools/ebpf
$ make -f Makefile.ebpf

Current eBPF RSS implementation uses 'bounded loops' with 'backward jump instructions' which present in the last kernels.
Overall eBPF RSS works on kernels 5.8+.

eBPF RSS implementation
-----------------------

eBPF RSS loading functionality located in ebpf/ebpf_rss.c and ebpf/ebpf_rss.h.

The `struct EBPFRSSContext` structure that holds 4 file descriptors:

- ctx - pointer of the libbpf context.
- program_fd - file descriptor of the eBPF RSS program.
- map_configuration - file descriptor of the 'configuration' map. This map contains one element of 'struct EBPFRSSConfig'. This configuration determines eBPF program behavior.
- map_toeplitz_key - file descriptor of the 'Toeplitz key' map. One element of the 40byte key prepared for the hashing algorithm.
- map_indirections_table - 128 elements of queue indexes.

`struct EBPFRSSConfig` fields:

- redirect - "boolean" value, should the hash be calculated, on false - `default_queue` would be used as the final decision.
- populate_hash - for now, not used. eBPF RSS doesn't support hash reporting.
- hash_types - binary mask of different hash types. See `VIRTIO_NET_RSS_HASH_TYPE_*` defines. If for packet hash should not be calculated - `default_queue` would be used.
- indirections_len - length of the indirections table, maximum 128.
- default_queue - the queue index that used for packet that shouldn't be hashed. For some packets, the hash can't be calculated(g.e ARP).

Functions:

- `ebpf_rss_init()` - sets ctx to NULL, which indicates that EBPFRSSContext is not loaded.
- `ebpf_rss_load()` - creates 3 maps and loads eBPF program from the rss.bpf.skeleton.h. Returns 'true' on success. After that, program_fd can be used to set steering for TAP.
- `ebpf_rss_set_all()` - sets values for eBPF maps. `indirections_table` length is in EBPFRSSConfig. `toeplitz_key` is VIRTIO_NET_RSS_MAX_KEY_SIZE aka 40 bytes array.
- `ebpf_rss_unload()` - close all file descriptors and set ctx to NULL.

Simplified eBPF RSS workflow:

.. code:: C
struct EBPFRSSConfig config;
config.redirect = 1;
config.hash_types = VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4;
config.indirections_len = VIRTIO_NET_RSS_MAX_TABLE_LEN;
config.default_queue = 0;
uint16_t table[VIRTIO_NET_RSS_MAX_TABLE_LEN] = {...};
uint8_t key[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {...};
struct EBPFRSSContext ctx;
ebpf_rss_init(&ctx);
ebpf_rss_load(&ctx);
ebpf_rss_set_all(&ctx, &config, table, key);
if (net_client->info->set_steering_ebpf != NULL) {
net_client->info->set_steering_ebpf(net_client, ctx->program_fd);
}
...
ebpf_unload(&ctx);
NetClientState SetSteeringEBPF()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

For now, `set_steering_ebpf()` method supported by Linux TAP NetClientState. The method requires an eBPF program file descriptor as an argument.
1 change: 1 addition & 0 deletions docs/devel/index.rst
Expand Up @@ -43,3 +43,4 @@ Contents:
qom
block-coroutine-wrapper
multi-process
ebpf_rss
40 changes: 40 additions & 0 deletions ebpf/ebpf_rss-stub.c
@@ -0,0 +1,40 @@
/*
* eBPF RSS stub file
*
* Developed by Daynix Computing LTD (http://www.daynix.com)
*
* Authors:
* Yuri Benditovich <yuri.benditovich@daynix.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*/

#include "qemu/osdep.h"
#include "ebpf/ebpf_rss.h"

void ebpf_rss_init(struct EBPFRSSContext *ctx)
{

}

bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
{
return false;
}

bool ebpf_rss_load(struct EBPFRSSContext *ctx)
{
return false;
}

bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
uint16_t *indirections_table, uint8_t *toeplitz_key)
{
return false;
}

void ebpf_rss_unload(struct EBPFRSSContext *ctx)
{

}
165 changes: 165 additions & 0 deletions ebpf/ebpf_rss.c
@@ -0,0 +1,165 @@
/*
* eBPF RSS loader
*
* Developed by Daynix Computing LTD (http://www.daynix.com)
*
* Authors:
* Andrew Melnychenko <andrew@daynix.com>
* Yuri Benditovich <yuri.benditovich@daynix.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*/

#include "qemu/osdep.h"
#include "qemu/error-report.h"

#include <bpf/libbpf.h>
#include <bpf/bpf.h>

#include "hw/virtio/virtio-net.h" /* VIRTIO_NET_RSS_MAX_TABLE_LEN */

#include "ebpf/ebpf_rss.h"
#include "ebpf/rss.bpf.skeleton.h"
#include "trace.h"

void ebpf_rss_init(struct EBPFRSSContext *ctx)
{
if (ctx != NULL) {
ctx->obj = NULL;
}
}

bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
{
return ctx != NULL && ctx->obj != NULL;
}

bool ebpf_rss_load(struct EBPFRSSContext *ctx)
{
struct rss_bpf *rss_bpf_ctx;

if (ctx == NULL) {
return false;
}

rss_bpf_ctx = rss_bpf__open();
if (rss_bpf_ctx == NULL) {
trace_ebpf_error("eBPF RSS", "can not open eBPF RSS object");
goto error;
}

bpf_program__set_socket_filter(rss_bpf_ctx->progs.tun_rss_steering_prog);

if (rss_bpf__load(rss_bpf_ctx)) {
trace_ebpf_error("eBPF RSS", "can not load RSS program");
goto error;
}

ctx->obj = rss_bpf_ctx;
ctx->program_fd = bpf_program__fd(
rss_bpf_ctx->progs.tun_rss_steering_prog);
ctx->map_configuration = bpf_map__fd(
rss_bpf_ctx->maps.tap_rss_map_configurations);
ctx->map_indirections_table = bpf_map__fd(
rss_bpf_ctx->maps.tap_rss_map_indirection_table);
ctx->map_toeplitz_key = bpf_map__fd(
rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);

return true;
error:
rss_bpf__destroy(rss_bpf_ctx);
ctx->obj = NULL;

return false;
}

static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
struct EBPFRSSConfig *config)
{
uint32_t map_key = 0;

if (!ebpf_rss_is_loaded(ctx)) {
return false;
}
if (bpf_map_update_elem(ctx->map_configuration,
&map_key, config, 0) < 0) {
return false;
}
return true;
}

static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
uint16_t *indirections_table,
size_t len)
{
uint32_t i = 0;

if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
return false;
}

for (; i < len; ++i) {
if (bpf_map_update_elem(ctx->map_indirections_table, &i,
indirections_table + i, 0) < 0) {
return false;
}
}
return true;
}

static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
uint8_t *toeplitz_key)
{
uint32_t map_key = 0;

/* prepare toeplitz key */
uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};

if (!ebpf_rss_is_loaded(ctx) || toeplitz_key == NULL) {
return false;
}
memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
*(uint32_t *)toe = ntohl(*(uint32_t *)toe);

if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
0) < 0) {
return false;
}
return true;
}

bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
uint16_t *indirections_table, uint8_t *toeplitz_key)
{
if (!ebpf_rss_is_loaded(ctx) || config == NULL ||
indirections_table == NULL || toeplitz_key == NULL) {
return false;
}

if (!ebpf_rss_set_config(ctx, config)) {
return false;
}

if (!ebpf_rss_set_indirections_table(ctx, indirections_table,
config->indirections_len)) {
return false;
}

if (!ebpf_rss_set_toepliz_key(ctx, toeplitz_key)) {
return false;
}

return true;
}

void ebpf_rss_unload(struct EBPFRSSContext *ctx)
{
if (!ebpf_rss_is_loaded(ctx)) {
return;
}

rss_bpf__destroy(ctx->obj);
ctx->obj = NULL;
}

0 comments on commit 1cbd2d9

Please sign in to comment.