Skip to content

Commit

Permalink
i40e: add XDP support for pass and drop actions
Browse files Browse the repository at this point in the history
This commit adds basic XDP support for i40e derived NICs. All XDP
actions will end up in XDP_DROP.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
  • Loading branch information
Björn Töpel authored and Jeff Kirsher committed Jun 21, 2017
1 parent f5c3064 commit 0c8493d
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 31 deletions.
7 changes: 7 additions & 0 deletions drivers/net/ethernet/intel/i40e/i40e.h
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,8 @@ struct i40e_vsi {
u16 max_frame;
u16 rx_buf_len;

struct bpf_prog *xdp_prog;

/* List of q_vectors allocated to this VSI */
struct i40e_q_vector **q_vectors;
int num_q_vectors;
Expand Down Expand Up @@ -972,4 +974,9 @@ i40e_status i40e_get_npar_bw_setting(struct i40e_pf *pf);
i40e_status i40e_set_npar_bw_setting(struct i40e_pf *pf);
i40e_status i40e_commit_npar_bw_setting(struct i40e_pf *pf);
void i40e_print_link_message(struct i40e_vsi *vsi, bool isup);

static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
{
return !!vsi->xdp_prog;
}
#endif /* _I40E_H_ */
87 changes: 87 additions & 0 deletions drivers/net/ethernet/intel/i40e/i40e_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <linux/etherdevice.h>
#include <linux/of_net.h>
#include <linux/pci.h>
#include <linux/bpf.h>

/* Local includes */
#include "i40e.h"
Expand Down Expand Up @@ -2395,6 +2396,18 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
}
}

/**
* i40e_max_xdp_frame_size - returns the maximum allowed frame size for XDP
* @vsi: the vsi
**/
static int i40e_max_xdp_frame_size(struct i40e_vsi *vsi)
{
if (PAGE_SIZE >= 8192 || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
return I40E_RXBUFFER_2048;
else
return I40E_RXBUFFER_3072;
}

/**
* i40e_change_mtu - NDO callback to change the Maximum Transfer Unit
* @netdev: network interface device structure
Expand All @@ -2408,6 +2421,13 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;

if (i40e_enabled_xdp_vsi(vsi)) {
int frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;

if (frame_size > i40e_max_xdp_frame_size(vsi))
return -EINVAL;
}

netdev_info(netdev, "changing MTU from %d to %d\n",
netdev->mtu, new_mtu);
netdev->mtu = new_mtu;
Expand Down Expand Up @@ -9311,6 +9331,72 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb,
return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
}

/**
* i40e_xdp_setup - add/remove an XDP program
* @vsi: VSI to changed
* @prog: XDP program
**/
static int i40e_xdp_setup(struct i40e_vsi *vsi,
struct bpf_prog *prog)
{
int frame_size = vsi->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
struct i40e_pf *pf = vsi->back;
struct bpf_prog *old_prog;
bool need_reset;
int i;

/* Don't allow frames that span over multiple buffers */
if (frame_size > vsi->rx_buf_len)
return -EINVAL;

if (!i40e_enabled_xdp_vsi(vsi) && !prog)
return 0;

/* When turning XDP on->off/off->on we reset and rebuild the rings. */
need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog);

if (need_reset)
i40e_prep_for_reset(pf, true);

old_prog = xchg(&vsi->xdp_prog, prog);

if (need_reset)
i40e_reset_and_rebuild(pf, true, true);

for (i = 0; i < vsi->num_queue_pairs; i++)
WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);

if (old_prog)
bpf_prog_put(old_prog);

return 0;
}

/**
* i40e_xdp - implements ndo_xdp for i40e
* @dev: netdevice
* @xdp: XDP command
**/
static int i40e_xdp(struct net_device *dev,
struct netdev_xdp *xdp)
{
struct i40e_netdev_priv *np = netdev_priv(dev);
struct i40e_vsi *vsi = np->vsi;

if (vsi->type != I40E_VSI_MAIN)
return -EINVAL;

switch (xdp->command) {
case XDP_SETUP_PROG:
return i40e_xdp_setup(vsi, xdp->prog);
case XDP_QUERY_PROG:
xdp->prog_attached = i40e_enabled_xdp_vsi(vsi);
return 0;
default:
return -EINVAL;
}
}

static const struct net_device_ops i40e_netdev_ops = {
.ndo_open = i40e_open,
.ndo_stop = i40e_close,
Expand Down Expand Up @@ -9343,6 +9429,7 @@ static const struct net_device_ops i40e_netdev_ops = {
.ndo_features_check = i40e_features_check,
.ndo_bridge_getlink = i40e_ndo_bridge_getlink,
.ndo_bridge_setlink = i40e_ndo_bridge_setlink,
.ndo_xdp = i40e_xdp,
};

/**
Expand Down
130 changes: 99 additions & 31 deletions drivers/net/ethernet/intel/i40e/i40e_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

#include <linux/prefetch.h>
#include <net/busy_poll.h>
#include <linux/bpf_trace.h>
#include "i40e.h"
#include "i40e_trace.h"
#include "i40e_prototype.h"
Expand Down Expand Up @@ -1195,6 +1196,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
void i40e_free_rx_resources(struct i40e_ring *rx_ring)
{
i40e_clean_rx_ring(rx_ring);
rx_ring->xdp_prog = NULL;
kfree(rx_ring->rx_bi);
rx_ring->rx_bi = NULL;

Expand Down Expand Up @@ -1241,6 +1243,8 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;

rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;

return 0;
err:
kfree(rx_ring->rx_bi);
Expand Down Expand Up @@ -1593,6 +1597,7 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring,
* i40e_cleanup_headers - Correct empty headers
* @rx_ring: rx descriptor ring packet is being transacted on
* @skb: pointer to current skb being fixed
* @rx_desc: pointer to the EOP Rx descriptor
*
* Also address the case where we are pulling data in on pages only
* and as such no data is present in the skb header.
Expand All @@ -1602,8 +1607,25 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring,
*
* Returns true if an error was encountered and skb was freed.
**/
static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb)
static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
union i40e_rx_desc *rx_desc)

{
/* XDP packets use error pointer so abort at this point */
if (IS_ERR(skb))
return true;

/* ERR_MASK will only have valid bits if EOP set, and
* what we are doing here is actually checking
* I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
* the error field
*/
if (unlikely(i40e_test_staterr(rx_desc,
BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {
dev_kfree_skb_any(skb);
return true;
}

/* if eth_skb_pad returns an error the skb was freed */
if (eth_skb_pad(skb))
return true;
Expand Down Expand Up @@ -1776,17 +1798,17 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
* i40e_construct_skb - Allocate skb and populate it
* @rx_ring: rx descriptor ring to transact packets on
* @rx_buffer: rx buffer to pull data from
* @size: size of buffer to add to skb
* @xdp: xdp_buff pointing to the data
*
* This function allocates an skb. It then populates it with the page
* data from the current receive descriptor, taking care to set up the
* skb correctly.
*/
static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer,
unsigned int size)
struct xdp_buff *xdp)
{
void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
unsigned int size = xdp->data_end - xdp->data;
#if (PAGE_SIZE < 8192)
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
Expand All @@ -1796,9 +1818,9 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
struct sk_buff *skb;

/* prefetch first cache line of first page */
prefetch(va);
prefetch(xdp->data);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
prefetch(xdp->data + L1_CACHE_BYTES);
#endif

/* allocate a skb to store the frags */
Expand All @@ -1811,10 +1833,11 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
/* Determine available headroom for copy */
headlen = size;
if (headlen > I40E_RX_HDR_SIZE)
headlen = eth_get_headlen(va, I40E_RX_HDR_SIZE);
headlen = eth_get_headlen(xdp->data, I40E_RX_HDR_SIZE);

/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
memcpy(__skb_put(skb, headlen), xdp->data,
ALIGN(headlen, sizeof(long)));

/* update all of the pointers */
size -= headlen;
Expand All @@ -1841,16 +1864,16 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
* i40e_build_skb - Build skb around an existing buffer
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buffer: Rx buffer to pull data from
* @size: size of buffer to add to skb
* @xdp: xdp_buff pointing to the data
*
* This function builds an skb around an existing Rx buffer, taking care
* to set up the skb correctly and avoid any memcpy overhead.
*/
static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer,
unsigned int size)
struct xdp_buff *xdp)
{
void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
unsigned int size = xdp->data_end - xdp->data;
#if (PAGE_SIZE < 8192)
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
Expand All @@ -1860,12 +1883,12 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
struct sk_buff *skb;

/* prefetch first cache line of first page */
prefetch(va);
prefetch(xdp->data);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
prefetch(xdp->data + L1_CACHE_BYTES);
#endif
/* build an skb around the page buffer */
skb = build_skb(va - I40E_SKB_PAD, truesize);
skb = build_skb(xdp->data_hard_start, truesize);
if (unlikely(!skb))
return NULL;

Expand Down Expand Up @@ -1944,6 +1967,46 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
return true;
}

#define I40E_XDP_PASS 0
#define I40E_XDP_CONSUMED 1

/**
* i40e_run_xdp - run an XDP program
* @rx_ring: Rx ring being processed
* @xdp: XDP buffer containing the frame
**/
static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
struct xdp_buff *xdp)
{
int result = I40E_XDP_PASS;
struct bpf_prog *xdp_prog;
u32 act;

rcu_read_lock();
xdp_prog = READ_ONCE(rx_ring->xdp_prog);

if (!xdp_prog)
goto xdp_out;

act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
break;
default:
bpf_warn_invalid_xdp_action(act);
case XDP_TX:
case XDP_ABORTED:
trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
/* fallthrough -- handle aborts by dropping packet */
case XDP_DROP:
result = I40E_XDP_CONSUMED;
break;
}
xdp_out:
rcu_read_unlock();
return ERR_PTR(-result);
}

/**
* i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
* @rx_ring: rx descriptor ring to transact packets on
Expand All @@ -1966,6 +2029,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
while (likely(total_rx_packets < budget)) {
struct i40e_rx_buffer *rx_buffer;
union i40e_rx_desc *rx_desc;
struct xdp_buff xdp;
unsigned int size;
u16 vlan_tag;
u8 rx_ptype;
Expand Down Expand Up @@ -2006,12 +2070,27 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
rx_buffer = i40e_get_rx_buffer(rx_ring, size);

/* retrieve a buffer from the ring */
if (skb)
if (!skb) {
xdp.data = page_address(rx_buffer->page) +
rx_buffer->page_offset;
xdp.data_hard_start = xdp.data -
i40e_rx_offset(rx_ring);
xdp.data_end = xdp.data + size;

skb = i40e_run_xdp(rx_ring, &xdp);
}

if (IS_ERR(skb)) {
total_rx_bytes += size;
total_rx_packets++;
rx_buffer->pagecnt_bias++;
} else if (skb) {
i40e_add_rx_frag(rx_ring, rx_buffer, skb, size);
else if (ring_uses_build_skb(rx_ring))
skb = i40e_build_skb(rx_ring, rx_buffer, size);
else
skb = i40e_construct_skb(rx_ring, rx_buffer, size);
} else if (ring_uses_build_skb(rx_ring)) {
skb = i40e_build_skb(rx_ring, rx_buffer, &xdp);
} else {
skb = i40e_construct_skb(rx_ring, rx_buffer, &xdp);
}

/* exit if we failed to retrieve a buffer */
if (!skb) {
Expand All @@ -2026,18 +2105,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
if (i40e_is_non_eop(rx_ring, rx_desc, skb))
continue;

/* ERR_MASK will only have valid bits if EOP set, and
* what we are doing here is actually checking
* I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
* the error field
*/
if (unlikely(i40e_test_staterr(rx_desc, BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {
dev_kfree_skb_any(skb);
skb = NULL;
continue;
}

if (i40e_cleanup_headers(rx_ring, skb)) {
if (i40e_cleanup_headers(rx_ring, skb, rx_desc)) {
skb = NULL;
continue;
}
Expand Down
1 change: 1 addition & 0 deletions drivers/net/ethernet/intel/i40e/i40e_txrx.h
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@ struct i40e_ring {
void *desc; /* Descriptor ring memory */
struct device *dev; /* Used for DMA mapping */
struct net_device *netdev; /* netdev ring maps to */
struct bpf_prog *xdp_prog;
union {
struct i40e_tx_buffer *tx_bi;
struct i40e_rx_buffer *rx_bi;
Expand Down

0 comments on commit 0c8493d

Please sign in to comment.