Skip to content

Commit

Permalink
net/ice: add Tx AVX512 offload path
Browse files Browse the repository at this point in the history
Add alternative Tx data path for AVX512 which can support partial
Tx offload features, including Tx checksum offload, vlan/QinQ
insertion offload.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
Tested-by: Qin Sun <qinx.sun@intel.com>
  • Loading branch information
Ninja-Mobius authored and qzhan16 committed Apr 16, 2021
1 parent b335e72 commit 28f9002
Show file tree
Hide file tree
Showing 6 changed files with 164 additions and 35 deletions.
28 changes: 22 additions & 6 deletions drivers/net/ice/ice_rxtx.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "rte_pmd_ice.h"
#include "ice_rxtx.h"
#include "ice_rxtx_vec_common.h"

#define ICE_TX_CKSUM_OFFLOAD_MASK ( \
PKT_TX_IP_CKSUM | \
Expand Down Expand Up @@ -3267,12 +3268,14 @@ ice_set_tx_function(struct rte_eth_dev *dev)
#ifdef RTE_ARCH_X86
struct ice_tx_queue *txq;
int i;
int tx_check_ret;
bool use_avx512 = false;
bool use_avx2 = false;

if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
if (!ice_tx_vec_dev_check(dev) &&
rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
tx_check_ret = ice_tx_vec_dev_check(dev);
if (tx_check_ret >= 0 &&
rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
ad->tx_vec_allowed = true;
for (i = 0; i < dev->data->nb_tx_queues; i++) {
txq = dev->data->tx_queues[i];
Expand All @@ -3291,12 +3294,15 @@ ice_set_tx_function(struct rte_eth_dev *dev)
PMD_DRV_LOG(NOTICE,
"AVX512 is not supported in build env");
#endif
if (!use_avx512 &&
if (!use_avx512 && tx_check_ret == ICE_VECTOR_PATH &&
(rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256)
use_avx2 = true;

if (!use_avx512 && tx_check_ret == ICE_VECTOR_OFFLOAD_PATH)
ad->tx_vec_allowed = false;

} else {
ad->tx_vec_allowed = false;
}
Expand All @@ -3305,9 +3311,18 @@ ice_set_tx_function(struct rte_eth_dev *dev)
if (ad->tx_vec_allowed) {
if (use_avx512) {
#ifdef CC_AVX512_SUPPORT
PMD_DRV_LOG(NOTICE, "Using AVX512 Vector Tx (port %d).",
dev->data->port_id);
dev->tx_pkt_burst = ice_xmit_pkts_vec_avx512;
if (tx_check_ret == ICE_VECTOR_OFFLOAD_PATH) {
PMD_DRV_LOG(NOTICE,
"Using AVX512 OFFLOAD Vector Tx (port %d).",
dev->data->port_id);
dev->tx_pkt_burst =
ice_xmit_pkts_vec_avx512_offload;
} else {
PMD_DRV_LOG(NOTICE,
"Using AVX512 Vector Tx (port %d).",
dev->data->port_id);
dev->tx_pkt_burst = ice_xmit_pkts_vec_avx512;
}
#endif
} else {
PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port %d).",
Expand Down Expand Up @@ -3343,6 +3358,7 @@ static const struct {
#ifdef RTE_ARCH_X86
#ifdef CC_AVX512_SUPPORT
{ ice_xmit_pkts_vec_avx512, "Vector AVX512" },
{ ice_xmit_pkts_vec_avx512_offload, "Offload Vector AVX512" },
#endif
{ ice_xmit_pkts_vec_avx2, "Vector AVX2" },
{ ice_xmit_pkts_vec, "Vector SSE" },
Expand Down
3 changes: 3 additions & 0 deletions drivers/net/ice/ice_rxtx.h
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,9 @@ uint16_t ice_recv_scattered_pkts_vec_avx512(void *rx_queue,
uint16_t nb_pkts);
uint16_t ice_xmit_pkts_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
uint16_t ice_xmit_pkts_vec_avx512_offload(void *tx_queue,
struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
int ice_fdir_programming(struct ice_pf *pf, struct ice_fltr_desc *fdir_desc);
int ice_tx_done_cleanup(void *txq, uint32_t free_cnt);
int ice_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc);
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ice/ice_rxtx_vec_avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -853,7 +853,7 @@ ice_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);

if (txq->nb_tx_free < txq->tx_free_thresh)
ice_tx_free_bufs(txq);
ice_tx_free_bufs_vec(txq);

nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
if (unlikely(nb_pkts == 0))
Expand Down
58 changes: 46 additions & 12 deletions drivers/net/ice/ice_rxtx_vec_avx512.c
Original file line number Diff line number Diff line change
Expand Up @@ -982,23 +982,26 @@ ice_tx_free_bufs_avx512(struct ice_tx_queue *txq)
return txq->tx_rs_thresh;
}

static inline void
static __rte_always_inline void
ice_vtx1(volatile struct ice_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags)
struct rte_mbuf *pkt, uint64_t flags, bool do_offload)
{
uint64_t high_qw =
(ICE_TX_DESC_DTYPE_DATA |
((uint64_t)flags << ICE_TXD_QW1_CMD_S) |
((uint64_t)pkt->data_len << ICE_TXD_QW1_TX_BUF_SZ_S));

if (do_offload)
ice_txd_enable_offload(pkt, &high_qw);

__m128i descriptor = _mm_set_epi64x(high_qw,
pkt->buf_iova + pkt->data_off);
_mm_store_si128((__m128i *)txdp, descriptor);
}

static inline void
ice_vtx(volatile struct ice_tx_desc *txdp,
struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags)
static __rte_always_inline void
ice_vtx(volatile struct ice_tx_desc *txdp, struct rte_mbuf **pkt,
uint16_t nb_pkts, uint64_t flags, bool do_offload)
{
const uint64_t hi_qw_tmpl = (ICE_TX_DESC_DTYPE_DATA |
((uint64_t)flags << ICE_TXD_QW1_CMD_S));
Expand All @@ -1008,18 +1011,26 @@ ice_vtx(volatile struct ice_tx_desc *txdp,
hi_qw_tmpl |
((uint64_t)pkt[3]->data_len <<
ICE_TXD_QW1_TX_BUF_SZ_S);
if (do_offload)
ice_txd_enable_offload(pkt[3], &hi_qw3);
uint64_t hi_qw2 =
hi_qw_tmpl |
((uint64_t)pkt[2]->data_len <<
ICE_TXD_QW1_TX_BUF_SZ_S);
if (do_offload)
ice_txd_enable_offload(pkt[2], &hi_qw2);
uint64_t hi_qw1 =
hi_qw_tmpl |
((uint64_t)pkt[1]->data_len <<
ICE_TXD_QW1_TX_BUF_SZ_S);
if (do_offload)
ice_txd_enable_offload(pkt[1], &hi_qw1);
uint64_t hi_qw0 =
hi_qw_tmpl |
((uint64_t)pkt[0]->data_len <<
ICE_TXD_QW1_TX_BUF_SZ_S);
if (do_offload)
ice_txd_enable_offload(pkt[0], &hi_qw0);

__m512i desc0_3 =
_mm512_set_epi64
Expand All @@ -1036,7 +1047,7 @@ ice_vtx(volatile struct ice_tx_desc *txdp,

/* do any last ones */
while (nb_pkts) {
ice_vtx1(txdp, *pkt, flags);
ice_vtx1(txdp, *pkt, flags, do_offload);
txdp++, pkt++, nb_pkts--;
}
}
Expand All @@ -1051,9 +1062,9 @@ ice_tx_backlog_entry_avx512(struct ice_vec_tx_entry *txep,
txep[i].mbuf = tx_pkts[i];
}

static inline uint16_t
static __rte_always_inline uint16_t
ice_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
uint16_t nb_pkts, bool do_offload)
{
struct ice_tx_queue *txq = (struct ice_tx_queue *)tx_queue;
volatile struct ice_tx_desc *txdp;
Expand Down Expand Up @@ -1083,11 +1094,11 @@ ice_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
if (nb_commit >= n) {
ice_tx_backlog_entry_avx512(txep, tx_pkts, n);

ice_vtx(txdp, tx_pkts, n - 1, flags);
ice_vtx(txdp, tx_pkts, n - 1, flags, do_offload);
tx_pkts += (n - 1);
txdp += (n - 1);

ice_vtx1(txdp, *tx_pkts++, rs);
ice_vtx1(txdp, *tx_pkts++, rs, do_offload);

nb_commit = (uint16_t)(nb_commit - n);

Expand All @@ -1101,7 +1112,7 @@ ice_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,

ice_tx_backlog_entry_avx512(txep, tx_pkts, nb_commit);

ice_vtx(txdp, tx_pkts, nb_commit, flags);
ice_vtx(txdp, tx_pkts, nb_commit, flags, do_offload);

tx_id = (uint16_t)(tx_id + nb_commit);
if (tx_id > txq->tx_next_rs) {
Expand Down Expand Up @@ -1131,7 +1142,30 @@ ice_xmit_pkts_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,

num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
ret = ice_xmit_fixed_burst_vec_avx512(tx_queue,
&tx_pkts[nb_tx], num);
&tx_pkts[nb_tx], num, false);
nb_tx += ret;
nb_pkts -= ret;
if (ret < num)
break;
}

return nb_tx;
}

uint16_t
ice_xmit_pkts_vec_avx512_offload(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
{
uint16_t nb_tx = 0;
struct ice_tx_queue *txq = (struct ice_tx_queue *)tx_queue;

while (nb_pkts) {
uint16_t ret, num;

num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
ret = ice_xmit_fixed_burst_vec_avx512(tx_queue,
&tx_pkts[nb_tx], num, true);

nb_tx += ret;
nb_pkts -= ret;
if (ret < num)
Expand Down
106 changes: 91 additions & 15 deletions drivers/net/ice/ice_rxtx_vec_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ ice_rx_reassemble_packets(struct ice_rx_queue *rxq, struct rte_mbuf **rx_bufs,
}

static __rte_always_inline int
ice_tx_free_bufs(struct ice_tx_queue *txq)
ice_tx_free_bufs_vec(struct ice_tx_queue *txq)
{
struct ice_tx_entry *txep;
uint32_t n;
Expand Down Expand Up @@ -197,7 +197,8 @@ _ice_tx_queue_release_mbufs_vec(struct ice_tx_queue *txq)
#ifdef CC_AVX512_SUPPORT
struct rte_eth_dev *dev = txq->vsi->adapter->eth_dev;

if (dev->tx_pkt_burst == ice_xmit_pkts_vec_avx512) {
if (dev->tx_pkt_burst == ice_xmit_pkts_vec_avx512 ||
dev->tx_pkt_burst == ice_xmit_pkts_vec_avx512_offload) {
struct ice_vec_tx_entry *swr = (void *)txq->sw_ring;

if (txq->tx_tail < i) {
Expand Down Expand Up @@ -267,29 +268,39 @@ ice_rx_vec_queue_default(struct ice_rx_queue *rxq)
return 0;
}

#define ICE_NO_VECTOR_FLAGS ( \
DEV_TX_OFFLOAD_MULTI_SEGS | \
DEV_TX_OFFLOAD_VLAN_INSERT | \
DEV_TX_OFFLOAD_IPV4_CKSUM | \
DEV_TX_OFFLOAD_SCTP_CKSUM | \
DEV_TX_OFFLOAD_UDP_CKSUM | \
DEV_TX_OFFLOAD_TCP_TSO | \
#define ICE_TX_NO_VECTOR_FLAGS ( \
DEV_TX_OFFLOAD_MULTI_SEGS | \
DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | \
DEV_TX_OFFLOAD_TCP_TSO)

#define ICE_TX_VECTOR_OFFLOAD ( \
DEV_TX_OFFLOAD_VLAN_INSERT | \
DEV_TX_OFFLOAD_QINQ_INSERT | \
DEV_TX_OFFLOAD_IPV4_CKSUM | \
DEV_TX_OFFLOAD_SCTP_CKSUM | \
DEV_TX_OFFLOAD_UDP_CKSUM | \
DEV_TX_OFFLOAD_TCP_CKSUM)

#define ICE_VECTOR_PATH 0
#define ICE_VECTOR_OFFLOAD_PATH 1

static inline int
ice_tx_vec_queue_default(struct ice_tx_queue *txq)
{
if (!txq)
return -1;

if (txq->offloads & ICE_NO_VECTOR_FLAGS)
return -1;

if (txq->tx_rs_thresh < ICE_VPMD_TX_BURST ||
txq->tx_rs_thresh > ICE_TX_MAX_FREE_BUF_SZ)
return -1;

return 0;
if (txq->offloads & ICE_TX_NO_VECTOR_FLAGS)
return -1;

if (txq->offloads & ICE_TX_VECTOR_OFFLOAD)
return ICE_VECTOR_OFFLOAD_PATH;

return ICE_VECTOR_PATH;
}

static inline int
Expand All @@ -312,14 +323,19 @@ ice_tx_vec_dev_check_default(struct rte_eth_dev *dev)
{
int i;
struct ice_tx_queue *txq;
int ret = 0;
int result = 0;

for (i = 0; i < dev->data->nb_tx_queues; i++) {
txq = dev->data->tx_queues[i];
if (ice_tx_vec_queue_default(txq))
ret = ice_tx_vec_queue_default(txq);
if (ret < 0)
return -1;
if (ret == ICE_VECTOR_OFFLOAD_PATH)
result = ret;
}

return 0;
return result;
}

#ifdef CC_AVX2_SUPPORT
Expand Down Expand Up @@ -521,4 +537,64 @@ ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512)
}
#endif

static inline void
ice_txd_enable_offload(struct rte_mbuf *tx_pkt,
uint64_t *txd_hi)
{
uint64_t ol_flags = tx_pkt->ol_flags;
uint32_t td_cmd = 0;
uint32_t td_offset = 0;

/* Tx Checksum Offload */
/* SET MACLEN */
td_offset |= (tx_pkt->l2_len >> 1) <<
ICE_TX_DESC_LEN_MACLEN_S;

/* Enable L3 checksum offload */
if (ol_flags & PKT_TX_IP_CKSUM) {
td_cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM;
td_offset |= (tx_pkt->l3_len >> 2) <<
ICE_TX_DESC_LEN_IPLEN_S;
} else if (ol_flags & PKT_TX_IPV4) {
td_cmd |= ICE_TX_DESC_CMD_IIPT_IPV4;
td_offset |= (tx_pkt->l3_len >> 2) <<
ICE_TX_DESC_LEN_IPLEN_S;
} else if (ol_flags & PKT_TX_IPV6) {
td_cmd |= ICE_TX_DESC_CMD_IIPT_IPV6;
td_offset |= (tx_pkt->l3_len >> 2) <<
ICE_TX_DESC_LEN_IPLEN_S;
}

/* Enable L4 checksum offloads */
switch (ol_flags & PKT_TX_L4_MASK) {
case PKT_TX_TCP_CKSUM:
td_cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP;
td_offset |= (sizeof(struct rte_tcp_hdr) >> 2) <<
ICE_TX_DESC_LEN_L4_LEN_S;
break;
case PKT_TX_SCTP_CKSUM:
td_cmd |= ICE_TX_DESC_CMD_L4T_EOFT_SCTP;
td_offset |= (sizeof(struct rte_sctp_hdr) >> 2) <<
ICE_TX_DESC_LEN_L4_LEN_S;
break;
case PKT_TX_UDP_CKSUM:
td_cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP;
td_offset |= (sizeof(struct rte_udp_hdr) >> 2) <<
ICE_TX_DESC_LEN_L4_LEN_S;
break;
default:
break;
}

*txd_hi |= ((uint64_t)td_offset) << ICE_TXD_QW1_OFFSET_S;

/* Tx VLAN/QINQ insertion Offload */
if (ol_flags & (PKT_TX_VLAN | PKT_TX_QINQ)) {
td_cmd |= ICE_TX_DESC_CMD_IL2TAG1;
*txd_hi |= ((uint64_t)tx_pkt->vlan_tci <<
ICE_TXD_QW1_L2TAG1_S);
}

*txd_hi |= ((uint64_t)td_cmd) << ICE_TXD_QW1_CMD_S;
}
#endif
2 changes: 1 addition & 1 deletion drivers/net/ice/ice_rxtx_vec_sse.c
Original file line number Diff line number Diff line change
Expand Up @@ -702,7 +702,7 @@ ice_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);

if (txq->nb_tx_free < txq->tx_free_thresh)
ice_tx_free_bufs(txq);
ice_tx_free_bufs_vec(txq);

nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
nb_commit = nb_pkts;
Expand Down

0 comments on commit 28f9002

Please sign in to comment.