Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

XDP-hints kfuncs for Intel driver igc #4802

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions drivers/net/ethernet/intel/igc/igc.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <linux/ptp_clock_kernel.h>
#include <linux/timecounter.h>
#include <linux/net_tstamp.h>
#include <linux/bitfield.h>

#include "igc_hw.h"

Expand Down Expand Up @@ -311,6 +312,33 @@ extern char igc_driver_name[];
#define IGC_MRQC_RSS_FIELD_IPV4_UDP 0x00400000
#define IGC_MRQC_RSS_FIELD_IPV6_UDP 0x00800000

/* RX-desc Write-Back format RSS Type's */
enum igc_rss_type_num {
IGC_RSS_TYPE_NO_HASH = 0,
IGC_RSS_TYPE_HASH_TCP_IPV4 = 1,
IGC_RSS_TYPE_HASH_IPV4 = 2,
IGC_RSS_TYPE_HASH_TCP_IPV6 = 3,
IGC_RSS_TYPE_HASH_IPV6_EX = 4,
IGC_RSS_TYPE_HASH_IPV6 = 5,
IGC_RSS_TYPE_HASH_TCP_IPV6_EX = 6,
IGC_RSS_TYPE_HASH_UDP_IPV4 = 7,
IGC_RSS_TYPE_HASH_UDP_IPV6 = 8,
IGC_RSS_TYPE_HASH_UDP_IPV6_EX = 9,
IGC_RSS_TYPE_MAX = 10,
};
#define IGC_RSS_TYPE_MAX_TABLE 16
#define IGC_RSS_TYPE_MASK GENMASK(3,0) /* 4-bits (3:0) = mask 0x0F */

/* igc_rss_type - Rx descriptor RSS type field */
static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc)
{
/* RSS Type 4-bits (3:0) number: 0-9 (above 9 is reserved)
* Accessing the same bits via u16 (wb.lower.lo_dword.hs_rss.pkt_info)
* is slightly slower than via u32 (wb.lower.lo_dword.data)
*/
return le32_get_bits(rx_desc->wb.lower.lo_dword.data, IGC_RSS_TYPE_MASK);
}

/* Interrupt defines */
#define IGC_START_ITR 648 /* ~6000 ints/sec */
#define IGC_4K_ITR 980
Expand Down Expand Up @@ -471,6 +499,13 @@ struct igc_rx_buffer {
};
};

/* context wrapper around xdp_buff to provide access to descriptor metadata */
struct igc_xdp_buff {
struct xdp_buff xdp;
union igc_adv_rx_desc *rx_desc;
ktime_t rx_ts; /* data indication bit IGC_RXDADV_STAT_TSIP */
};

struct igc_q_vector {
struct igc_adapter *adapter; /* backlink */
void __iomem *itr_register;
Expand Down
94 changes: 83 additions & 11 deletions drivers/net/ethernet/intel/igc/igc_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1677,14 +1677,36 @@ static void igc_rx_checksum(struct igc_ring *ring,
le32_to_cpu(rx_desc->wb.upper.status_error));
}

/* Mapping HW RSS Type to enum pkt_hash_types */
enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = {
[IGC_RSS_TYPE_NO_HASH] = PKT_HASH_TYPE_L2,
[IGC_RSS_TYPE_HASH_TCP_IPV4] = PKT_HASH_TYPE_L4,
[IGC_RSS_TYPE_HASH_IPV4] = PKT_HASH_TYPE_L3,
[IGC_RSS_TYPE_HASH_TCP_IPV6] = PKT_HASH_TYPE_L4,
[IGC_RSS_TYPE_HASH_IPV6_EX] = PKT_HASH_TYPE_L3,
[IGC_RSS_TYPE_HASH_IPV6] = PKT_HASH_TYPE_L3,
[IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4,
[IGC_RSS_TYPE_HASH_UDP_IPV4] = PKT_HASH_TYPE_L4,
[IGC_RSS_TYPE_HASH_UDP_IPV6] = PKT_HASH_TYPE_L4,
[IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4,
[10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */
[11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask */
[12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons */
[13] = PKT_HASH_TYPE_NONE,
[14] = PKT_HASH_TYPE_NONE,
[15] = PKT_HASH_TYPE_NONE,
};

static inline void igc_rx_hash(struct igc_ring *ring,
union igc_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
if (ring->netdev->features & NETIF_F_RXHASH)
skb_set_hash(skb,
le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
PKT_HASH_TYPE_L3);
if (ring->netdev->features & NETIF_F_RXHASH) {
u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
u32 rss_type = igc_rss_type(rx_desc);

skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]);
}
}

static void igc_rx_vlan(struct igc_ring *rx_ring,
Expand Down Expand Up @@ -2201,6 +2223,8 @@ static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count)
if (!count)
return ok;

XSK_CHECK_PRIV_TYPE(struct igc_xdp_buff);

desc = IGC_RX_DESC(ring, i);
bi = &ring->rx_buffer_info[i];
i -= ring->count;
Expand Down Expand Up @@ -2485,8 +2509,8 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
union igc_adv_rx_desc *rx_desc;
struct igc_rx_buffer *rx_buffer;
unsigned int size, truesize;
struct igc_xdp_buff ctx;
ktime_t timestamp = 0;
struct xdp_buff xdp;
int pkt_offset = 0;
void *pktbuf;

Expand Down Expand Up @@ -2515,18 +2539,20 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) {
timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
pktbuf);
ctx.rx_ts = timestamp;
pkt_offset = IGC_TS_HDR_LEN;
size -= IGC_TS_HDR_LEN;
}

if (!skb) {
xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq);
xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring),
xdp_init_buff(&ctx.xdp, truesize, &rx_ring->xdp_rxq);
xdp_prepare_buff(&ctx.xdp, pktbuf - igc_rx_offset(rx_ring),
igc_rx_offset(rx_ring) + pkt_offset,
size, true);
xdp_buff_clear_frags_flag(&xdp);
xdp_buff_clear_frags_flag(&ctx.xdp);
ctx.rx_desc = rx_desc;

skb = igc_xdp_run_prog(adapter, &xdp);
skb = igc_xdp_run_prog(adapter, &ctx.xdp);
}

if (IS_ERR(skb)) {
Expand All @@ -2548,9 +2574,9 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
} else if (skb)
igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
else if (ring_uses_build_skb(rx_ring))
skb = igc_build_skb(rx_ring, rx_buffer, &xdp);
skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp);
else
skb = igc_construct_skb(rx_ring, rx_buffer, &xdp,
skb = igc_construct_skb(rx_ring, rx_buffer, &ctx.xdp,
timestamp);

/* exit if we failed to retrieve a buffer */
Expand Down Expand Up @@ -2651,6 +2677,15 @@ static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector,
napi_gro_receive(&q_vector->napi, skb);
}

static struct igc_xdp_buff *xsk_buff_to_igc_ctx(struct xdp_buff *xdp)
{
/* xdp_buff pointer used by ZC code path is alloc as xdp_buff_xsk. The
* igc_xdp_buff shares its layout with xdp_buff_xsk and private
* igc_xdp_buff fields fall into xdp_buff_xsk->cb
*/
return (struct igc_xdp_buff *)xdp;
}

static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
{
struct igc_adapter *adapter = q_vector->adapter;
Expand All @@ -2669,6 +2704,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
while (likely(total_packets < budget)) {
union igc_adv_rx_desc *desc;
struct igc_rx_buffer *bi;
struct igc_xdp_buff *ctx;
ktime_t timestamp = 0;
unsigned int size;
int res;
Expand All @@ -2686,9 +2722,13 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)

bi = &ring->rx_buffer_info[ntc];

ctx = xsk_buff_to_igc_ctx(bi->xdp);
ctx->rx_desc = desc;

if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) {
timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
bi->xdp->data);
ctx->rx_ts = timestamp;

bi->xdp->data += IGC_TS_HDR_LEN;

Expand Down Expand Up @@ -6443,6 +6483,36 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg)
return value;
}

static int igc_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp)
{
const struct igc_xdp_buff *ctx = (void *)_ctx;

if (igc_test_staterr(ctx->rx_desc, IGC_RXDADV_STAT_TSIP)) {
*timestamp = ctx->rx_ts;

return 0;
}

return -ENODATA;
}

static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash)
{
const struct igc_xdp_buff *ctx = (void *)_ctx;

if (!(ctx->xdp.rxq->dev->features & NETIF_F_RXHASH))
return -ENODATA;

*hash = le32_to_cpu(ctx->rx_desc->wb.lower.hi_dword.rss);

return 0;
}

const struct xdp_metadata_ops igc_xdp_metadata_ops = {
.xmo_rx_timestamp = igc_xdp_rx_timestamp,
.xmo_rx_hash = igc_xdp_rx_hash,
};

/**
* igc_probe - Device Initialization Routine
* @pdev: PCI device information struct
Expand Down Expand Up @@ -6516,6 +6586,7 @@ static int igc_probe(struct pci_dev *pdev,
hw->hw_addr = adapter->io_addr;

netdev->netdev_ops = &igc_netdev_ops;
netdev->xdp_metadata_ops = &igc_xdp_metadata_ops;
igc_ethtool_set_ops(netdev);
netdev->watchdog_timeo = 5 * HZ;

Expand Down Expand Up @@ -6543,6 +6614,7 @@ static int igc_probe(struct pci_dev *pdev,
netdev->features |= NETIF_F_TSO;
netdev->features |= NETIF_F_TSO6;
netdev->features |= NETIF_F_TSO_ECN;
netdev->features |= NETIF_F_RXHASH;
netdev->features |= NETIF_F_RXCSUM;
netdev->features |= NETIF_F_HW_CSUM;
netdev->features |= NETIF_F_SCTP_CRC;
Expand Down
18 changes: 12 additions & 6 deletions tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,21 @@ int rx(struct xdp_md *ctx)
return XDP_PASS;
}

if (!bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp))
bpf_printk("populated rx_timestamp with %llu", meta->rx_timestamp);
else
if (!bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp)) {
meta->xdp_timestamp = bpf_ktime_get_tai_ns();
bpf_printk("populated rx_timestamp with %llu", meta->rx_timestamp);
bpf_printk("populated xdp_timestamp with %llu", meta->xdp_timestamp);
} else {
meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */
}

if (!bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash))
bpf_printk("populated rx_hash with %u", meta->rx_hash);
else
ret = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash);
if (ret >= 0) {
bpf_printk("populated rx_hash with 0x%08X", meta->rx_hash);
} else {
bpf_printk("rx_hash not-avail errno:%d", ret);
meta->rx_hash = 0; /* Used by AF_XDP as not avail signal */
}

return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
}
Expand Down
51 changes: 45 additions & 6 deletions tools/testing/selftests/bpf/xdp_hw_metadata.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
/* Reference program for verifying XDP metadata on real HW. Functional test
* only, doesn't test the performance.
*
* BPF-prog bpf_printk info output can be access via
* /sys/kernel/debug/tracing/trace_pipe
*
* RX:
* - UDP 9091 packets are diverted into AF_XDP
* - Metadata verified:
Expand All @@ -27,6 +30,7 @@
#include <sys/mman.h>
#include <net/if.h>
#include <poll.h>
#include <time.h>

#include "xdp_metadata.h"

Expand Down Expand Up @@ -134,14 +138,47 @@ static void refill_rx(struct xsk *xsk, __u64 addr)
}
}

static void verify_xdp_metadata(void *data)
#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
static __u64 gettime(clockid_t clock_id)
{
struct timespec t;
int res;

/* See man clock_gettime(2) for type of clock_id's */
res = clock_gettime(clock_id, &t);

if (res < 0)
error(res, errno, "Error with clock_gettime()");

return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
}

static void verify_xdp_metadata(void *data, clockid_t clock_id)
{
struct xdp_meta *meta;

meta = data - sizeof(*meta);

printf("rx_timestamp: %llu\n", meta->rx_timestamp);
printf("rx_hash: %u\n", meta->rx_hash);
printf("rx_hash: 0x%08X\n", meta->rx_hash);
printf("rx_timestamp: %llu (sec:%0.4f)\n", meta->rx_timestamp,
(double)meta->rx_timestamp / NANOSEC_PER_SEC);
if (meta->rx_timestamp) {
__u64 usr_clock = gettime(clock_id);
__u64 xdp_clock = meta->xdp_timestamp;
__s64 delta_X = xdp_clock - meta->rx_timestamp;
__s64 delta_X2U = usr_clock - xdp_clock;

printf("XDP RX-time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n",
xdp_clock, (double)xdp_clock / NANOSEC_PER_SEC,
(double)delta_X / NANOSEC_PER_SEC,
(double)delta_X / 1000);

printf("AF_XDP time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n",
usr_clock, (double)usr_clock / NANOSEC_PER_SEC,
(double)delta_X2U / NANOSEC_PER_SEC,
(double)delta_X2U / 1000);
}

}

static void verify_skb_metadata(int fd)
Expand Down Expand Up @@ -189,7 +226,7 @@ static void verify_skb_metadata(int fd)
printf("skb hwtstamp is not found!\n");
}

static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd)
static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id)
{
const struct xdp_desc *rx_desc;
struct pollfd fds[rxq + 1];
Expand Down Expand Up @@ -237,7 +274,8 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd)
addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
xsk, idx, rx_desc->addr, addr, comp_addr);
verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr));
verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr),
clock_id);
xsk_ring_cons__release(&xsk->rx, 1);
refill_rx(xsk, comp_addr);
}
Expand Down Expand Up @@ -364,6 +402,7 @@ static void timestamping_enable(int fd, int val)

int main(int argc, char *argv[])
{
clockid_t clock_id = CLOCK_TAI;
int server_fd = -1;
int ret;
int i;
Expand Down Expand Up @@ -437,7 +476,7 @@ int main(int argc, char *argv[])
error(1, -ret, "bpf_xdp_attach");

signal(SIGINT, handle_signal);
ret = verify_metadata(rx_xsk, rxq, server_fd);
ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id);
close(server_fd);
cleanup();
if (ret)
Expand Down
1 change: 1 addition & 0 deletions tools/testing/selftests/bpf/xdp_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@

struct xdp_meta {
__u64 rx_timestamp;
__u64 xdp_timestamp;
__u32 rx_hash;
};