Skip to content

Commit

Permalink
net/ice: remove incorrect 16B descriptor read block
Browse files Browse the repository at this point in the history
[ upstream commit 9aee908eddeb6e8f3de402ac5661bca5161809a6 ]

By default, the driver works with 32B descriptors, but has a separate
descriptor read block for reading two descriptors at a time when using
16B descriptors. However, the 32B reads used are not guaranteed to be
atomic, which will cause issues if that is not the case on a system,
since the descriptors may be read in an undefined order.  Remove the
block, to avoid issues, and just use the regular descriptor reading path
for 16B descriptors, if that support is enabled at build time.

Fixes: ae60d3c ("net/ice: support Rx AVX2 vector")

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
  • Loading branch information
bruce-richardson authored and kevintraynor committed Mar 8, 2024
1 parent dcf42c7 commit d8f8df9
Showing 1 changed file with 24 additions and 56 deletions.
80 changes: 24 additions & 56 deletions drivers/net/ice/ice_rxtx_vec_avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -254,62 +254,30 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
_mm256_loadu_si256((void *)&sw_ring[i + 4]));
#endif

__m256i raw_desc0_1, raw_desc2_3, raw_desc4_5, raw_desc6_7;
#ifdef RTE_LIBRTE_ICE_16BYTE_RX_DESC
/* for AVX we need alignment otherwise loads are not atomic */
if (avx_aligned) {
/* load in descriptors, 2 at a time, in reverse order */
raw_desc6_7 = _mm256_load_si256((void *)(rxdp + 6));
rte_compiler_barrier();
raw_desc4_5 = _mm256_load_si256((void *)(rxdp + 4));
rte_compiler_barrier();
raw_desc2_3 = _mm256_load_si256((void *)(rxdp + 2));
rte_compiler_barrier();
raw_desc0_1 = _mm256_load_si256((void *)(rxdp + 0));
} else
#endif
{
const __m128i raw_desc7 =
_mm_load_si128((void *)(rxdp + 7));
rte_compiler_barrier();
const __m128i raw_desc6 =
_mm_load_si128((void *)(rxdp + 6));
rte_compiler_barrier();
const __m128i raw_desc5 =
_mm_load_si128((void *)(rxdp + 5));
rte_compiler_barrier();
const __m128i raw_desc4 =
_mm_load_si128((void *)(rxdp + 4));
rte_compiler_barrier();
const __m128i raw_desc3 =
_mm_load_si128((void *)(rxdp + 3));
rte_compiler_barrier();
const __m128i raw_desc2 =
_mm_load_si128((void *)(rxdp + 2));
rte_compiler_barrier();
const __m128i raw_desc1 =
_mm_load_si128((void *)(rxdp + 1));
rte_compiler_barrier();
const __m128i raw_desc0 =
_mm_load_si128((void *)(rxdp + 0));

raw_desc6_7 =
_mm256_inserti128_si256
(_mm256_castsi128_si256(raw_desc6),
raw_desc7, 1);
raw_desc4_5 =
_mm256_inserti128_si256
(_mm256_castsi128_si256(raw_desc4),
raw_desc5, 1);
raw_desc2_3 =
_mm256_inserti128_si256
(_mm256_castsi128_si256(raw_desc2),
raw_desc3, 1);
raw_desc0_1 =
_mm256_inserti128_si256
(_mm256_castsi128_si256(raw_desc0),
raw_desc1, 1);
}
const __m128i raw_desc7 = _mm_load_si128((void *)(rxdp + 7));
rte_compiler_barrier();
const __m128i raw_desc6 = _mm_load_si128((void *)(rxdp + 6));
rte_compiler_barrier();
const __m128i raw_desc5 = _mm_load_si128((void *)(rxdp + 5));
rte_compiler_barrier();
const __m128i raw_desc4 = _mm_load_si128((void *)(rxdp + 4));
rte_compiler_barrier();
const __m128i raw_desc3 = _mm_load_si128((void *)(rxdp + 3));
rte_compiler_barrier();
const __m128i raw_desc2 = _mm_load_si128((void *)(rxdp + 2));
rte_compiler_barrier();
const __m128i raw_desc1 = _mm_load_si128((void *)(rxdp + 1));
rte_compiler_barrier();
const __m128i raw_desc0 = _mm_load_si128((void *)(rxdp + 0));

const __m256i raw_desc6_7 =
_mm256_inserti128_si256(_mm256_castsi128_si256(raw_desc6), raw_desc7, 1);
const __m256i raw_desc4_5 =
_mm256_inserti128_si256(_mm256_castsi128_si256(raw_desc4), raw_desc5, 1);
const __m256i raw_desc2_3 =
_mm256_inserti128_si256(_mm256_castsi128_si256(raw_desc2), raw_desc3, 1);
const __m256i raw_desc0_1 =
_mm256_inserti128_si256(_mm256_castsi128_si256(raw_desc0), raw_desc1, 1);

if (split_packet) {
int j;
Expand Down

0 comments on commit d8f8df9

Please sign in to comment.