Skip to content

Commit

Permalink
netdev-afxdp: Add need_wakeup supprt.
Browse files Browse the repository at this point in the history
The patch adds support for using need_wakeup flag in AF_XDP rings.
A new option, use_need_wakeup, is added. When this option is used,
it means that OVS has to explicitly wake up the kernel RX, using poll()
syscall and wake up TX, using sendto() syscall. This feature improves
the performance by avoiding unnecessary sendto syscalls for TX.
For RX, instead of kernel always busy-spinning on fille queue, OVS wakes
up the kernel RX processing when fill queue is replenished.

The need_wakeup feature is merged into Linux kernel 5.3.0-rc1 and OVS
enables it by default. Running the feature before this version causes
xsk bind fails, please use options:use_need_wakeup=false to disable it.

For virtual interface, it's better set use_need_wakeup=false, since
the virtual device's AF_XDP xmit is synchronous: the sendto syscall
enters kernel and process the TX packet on tx queue directly.

I tested on kernel 5.3.0-rc3 using its libbpf.  On Intel Xeon E5-2620
v3 2.4GHz system, performance of physical port to physical port improves
from 6.1Mpps to 7.3Mpps. Testing on 5.2.0-rc6 using libbpf from 5.3.0-rc3
does not work due to libbpf API change. Users have to use the older
libbpf for older kernel.

Suggested-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: 0-day Robot <robot@bytheb.org>
  • Loading branch information
williamtu authored and ovsrobot committed Sep 5, 2019
1 parent d11f4cb commit 0e365ba
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 14 deletions.
14 changes: 11 additions & 3 deletions Documentation/intro/install/afxdp.rst
Expand Up @@ -176,9 +176,17 @@ in :doc:`general`::
ovs-vswitchd ...
ovs-vsctl -- add-br br0 -- set Bridge br0 datapath_type=netdev

Make sure your device driver support AF_XDP, and to use 1 PMD (on core 4)
on 1 queue (queue 0) device, configure these options: **pmd-cpu-mask,
pmd-rxq-affinity, and n_rxq**. The **xdpmode** can be "drv" or "skb"::
Make sure your device driver support AF_XDP, netdev-afxdp supports
the following additional options (see man ovs-vswitchd.conf.db for
more details):

* **xdpmode**: use "drv" for driver mode, or "skb" for skb mode.

* **use_need_wakeup**: enable by setting to "true", otherwise "false"

For example, to use 1 PMD (on core 4) on 1 queue (queue 0) device,
configure these options: **pmd-cpu-mask, pmd-rxq-affinity, and n_rxq**.
The **xdpmode** can be "drv" or "skb"::

ethtool -L enp2s0 combined 1
ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0x10
Expand Down
5 changes: 5 additions & 0 deletions acinclude.m4
Expand Up @@ -276,6 +276,11 @@ AC_DEFUN([OVS_CHECK_LINUX_AF_XDP], [
[Define to 1 if AF_XDP support is available and enabled.])
LIBBPF_LDADD=" -lbpf -lelf"
AC_SUBST([LIBBPF_LDADD])
AC_CHECK_DECL([xsk_ring_prod__needs_wakeup], [
AC_DEFINE([HAVE_XDP_NEED_WAKEUP], [1],
[XDP need wakeup support detected in xsk.h.])
], [], [#include <bpf/xsk.h>])
fi
AM_CONDITIONAL([HAVE_AF_XDP], test "$AF_XDP_ENABLE" = true)
])
Expand Down
94 changes: 83 additions & 11 deletions lib/netdev-afxdp.c
Expand Up @@ -26,6 +26,7 @@
#include <linux/rtnetlink.h>
#include <linux/if_xdp.h>
#include <net/if.h>
#include <poll.h>
#include <stdlib.h>
#include <sys/resource.h>
#include <sys/socket.h>
Expand Down Expand Up @@ -82,7 +83,7 @@ BUILD_ASSERT_DECL(PROD_NUM_DESCS == CONS_NUM_DESCS);
#define UMEM2DESC(elem, base) ((uint64_t)((char *)elem - (char *)base))

static struct xsk_socket_info *xsk_configure(int ifindex, int xdp_queue_id,
int mode);
int mode, bool use_need_wakeup);
static void xsk_remove_xdp_program(uint32_t ifindex, int xdpmode);
static void xsk_destroy(struct xsk_socket_info *xsk);
static int xsk_configure_all(struct netdev *netdev);
Expand Down Expand Up @@ -117,6 +118,49 @@ struct xsk_socket_info {
atomic_uint64_t tx_dropped;
};

#ifdef HAVE_XDP_NEED_WAKEUP
static inline void
xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem,
struct netdev *netdev, int fd)
{
struct pollfd pfd;
int ret;

if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
pfd.fd = fd;
pfd.events = POLLIN;

ret = poll(&pfd, 1, 0);
if (OVS_UNLIKELY(ret < 0)) {
VLOG_WARN_RL(&rl, "%s: error polling rx fd: %s.",
netdev_get_name(netdev),
ovs_strerror(errno));
}
}
}

static inline bool
xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info)
{
return xsk_ring_prod__needs_wakeup(&xsk_info->tx);
}

#else /* !HAVE_XDP_NEED_WAKEUP */
static inline void
xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem OVS_UNUSED,
struct netdev *netdev OVS_UNUSED,
int fd OVS_UNUSED)
{
/* Nothing. */
}

static inline bool
xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info OVS_UNUSED)
{
return true;
}
#endif /* HAVE_XDP_NEED_WAKEUP */

static void
netdev_afxdp_cleanup_unused_pool(struct unused_pool *pool)
{
Expand Down Expand Up @@ -234,7 +278,7 @@ xsk_configure_umem(void *buffer, uint64_t size, int xdpmode)

static struct xsk_socket_info *
xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
uint32_t queue_id, int xdpmode)
uint32_t queue_id, int xdpmode, bool use_need_wakeup)
{
struct xsk_socket_config cfg;
struct xsk_socket_info *xsk;
Expand All @@ -257,6 +301,12 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_SKB_MODE;
}

if (use_need_wakeup) {
#ifdef HAVE_XDP_NEED_WAKEUP
cfg.bind_flags |= XDP_USE_NEED_WAKEUP;
#endif
}

if (if_indextoname(ifindex, devname) == NULL) {
VLOG_ERR("ifindex %d to devname failed (%s)",
ifindex, ovs_strerror(errno));
Expand Down Expand Up @@ -311,7 +361,8 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
}

static struct xsk_socket_info *
xsk_configure(int ifindex, int xdp_queue_id, int xdpmode)
xsk_configure(int ifindex, int xdp_queue_id, int xdpmode,
bool use_need_wakeup)
{
struct xsk_socket_info *xsk;
struct xsk_umem_info *umem;
Expand All @@ -334,7 +385,8 @@ xsk_configure(int ifindex, int xdp_queue_id, int xdpmode)

VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);

xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode);
xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode,
use_need_wakeup);
if (!xsk) {
/* Clean up umem and xpacket pool. */
if (xsk_umem__delete(umem->umem)) {
Expand Down Expand Up @@ -365,9 +417,12 @@ xsk_configure_all(struct netdev *netdev)

/* Configure each queue. */
for (i = 0; i < n_rxq; i++) {
VLOG_INFO("%s: configure queue %d mode %s", __func__, i,
dev->xdpmode == XDP_COPY ? "SKB" : "DRV");
xsk_info = xsk_configure(ifindex, i, dev->xdpmode);
VLOG_INFO("%s: configure queue %d mode %s use_need_wakeup %s.",
__func__, i,
dev->xdpmode == XDP_COPY ? "SKB" : "DRV",
dev->use_need_wakeup ? "true" : "false");
xsk_info = xsk_configure(ifindex, i, dev->xdpmode,
dev->use_need_wakeup);
if (!xsk_info) {
VLOG_ERR("Failed to create AF_XDP socket on queue %d.", i);
dev->xsks[i] = NULL;
Expand Down Expand Up @@ -459,6 +514,7 @@ netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args,
struct netdev_linux *dev = netdev_linux_cast(netdev);
const char *str_xdpmode;
int xdpmode, new_n_rxq;
bool need_wakeup;

ovs_mutex_lock(&dev->mutex);
new_n_rxq = MAX(smap_get_int(args, "n_rxq", NR_QUEUE), 1);
Expand All @@ -481,10 +537,14 @@ netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args,
return EINVAL;
}

need_wakeup = smap_get_bool(args, "use_need_wakeup", true);

if (dev->requested_n_rxq != new_n_rxq
|| dev->requested_xdpmode != xdpmode) {
|| dev->requested_xdpmode != xdpmode
|| dev->requested_need_wakeup != need_wakeup) {
dev->requested_n_rxq = new_n_rxq;
dev->requested_xdpmode = xdpmode;
dev->requested_need_wakeup = need_wakeup;
netdev_request_reconfigure(netdev);
}
ovs_mutex_unlock(&dev->mutex);
Expand All @@ -500,6 +560,8 @@ netdev_afxdp_get_config(const struct netdev *netdev, struct smap *args)
smap_add_format(args, "n_rxq", "%d", netdev->n_rxq);
smap_add_format(args, "xdpmode", "%s",
dev->xdpmode == XDP_ZEROCOPY ? "drv" : "skb");
smap_add_format(args, "use_need_wakeup", "%s",
dev->use_need_wakeup ? "true" : "false");
ovs_mutex_unlock(&dev->mutex);
return 0;
}
Expand All @@ -515,6 +577,7 @@ netdev_afxdp_reconfigure(struct netdev *netdev)

if (netdev->n_rxq == dev->requested_n_rxq
&& dev->xdpmode == dev->requested_xdpmode
&& dev->use_need_wakeup == dev->requested_need_wakeup
&& dev->xsks) {
goto out;
}
Expand All @@ -538,6 +601,7 @@ netdev_afxdp_reconfigure(struct netdev *netdev)
* when no device is in DRV mode.
*/
}
dev->use_need_wakeup = dev->requested_need_wakeup;

err = xsk_configure_all(netdev);
if (err) {
Expand Down Expand Up @@ -660,6 +724,9 @@ netdev_afxdp_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,

rcvd = xsk_ring_cons__peek(&xsk_info->rx, BATCH_SIZE, &idx_rx);
if (!rcvd) {
if (dev->use_need_wakeup) {
xsk_rx_wakeup_if_needed(umem, netdev, rx->fd);
}
return EAGAIN;
}

Expand Down Expand Up @@ -704,11 +771,15 @@ netdev_afxdp_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
}

static inline int
kick_tx(struct xsk_socket_info *xsk_info, int xdpmode)
kick_tx(struct xsk_socket_info *xsk_info, int xdpmode, bool use_need_wakeup)
{
int ret, retries;
static const int KERNEL_TX_BATCH_SIZE = 16;

if (use_need_wakeup && !xsk_tx_need_wakeup(xsk_info)) {
return 0;
}

/* In SKB_MODE packet transmission is synchronous, and the kernel xmits
* only TX_BATCH_SIZE(16) packets for a single sendmsg syscall.
* So, we have to kick the kernel (n_packets / 16) times to be sure that
Expand Down Expand Up @@ -876,7 +947,7 @@ __netdev_afxdp_batch_send(struct netdev *netdev, int qid,
atomic_add_relaxed(&xsk_info->tx_dropped, batch->count, &orig);
COVERAGE_INC(afxdp_tx_full);
afxdp_complete_tx(xsk_info);
kick_tx(xsk_info, dev->xdpmode);
kick_tx(xsk_info, dev->xdpmode, dev->use_need_wakeup);
error = ENOMEM;
goto out;
}
Expand All @@ -900,7 +971,7 @@ __netdev_afxdp_batch_send(struct netdev *netdev, int qid,
xsk_ring_prod__submit(&xsk_info->tx, batch->count);
xsk_info->outstanding_tx += batch->count;

ret = kick_tx(xsk_info, dev->xdpmode);
ret = kick_tx(xsk_info, dev->xdpmode, dev->use_need_wakeup);
if (OVS_UNLIKELY(ret)) {
VLOG_WARN_RL(&rl, "%s: error sending AF_XDP packet: %s.",
netdev_get_name(netdev), ovs_strerror(ret));
Expand Down Expand Up @@ -970,6 +1041,7 @@ netdev_afxdp_construct(struct netdev *netdev)

dev->requested_n_rxq = NR_QUEUE;
dev->requested_xdpmode = XDP_COPY;
dev->requested_need_wakeup = true;

dev->xsks = NULL;
dev->tx_locks = NULL;
Expand Down
2 changes: 2 additions & 0 deletions lib/netdev-linux-private.h
Expand Up @@ -102,6 +102,8 @@ struct netdev_linux {
int requested_n_rxq;
int xdpmode; /* AF_XDP running mode: driver or skb. */
int requested_xdpmode;
bool use_need_wakeup;
bool requested_need_wakeup;
struct ovs_spin *tx_locks; /* spin lock array for TX queues. */
#endif
};
Expand Down
13 changes: 13 additions & 0 deletions vswitchd/vswitch.xml
Expand Up @@ -3147,6 +3147,19 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
</p>
</column>

<column name="options" key="use_need_wakeup"
type='{"type": "boolean"}'>
<p>
Specifies whether to use need_wakeup feature in afxdp netdev.
(required kernel version later than 5.3.0-rc1)
If enabled, OVS explicitly wakes up the kernel RX, using poll()
syscall and wakes up TX, using sendto() syscall. For physical
devices, this feature improves the performance by avoiding
unnecessary sendto syscalls.
Defaults is true.
</p>
</column>

<column name="options" key="vhost-server-path"
type='{"type": "string"}'>
<p>
Expand Down

0 comments on commit 0e365ba

Please sign in to comment.