From 9adc7b2b84d3e59ffd62a7d359618700a9dbf941 Mon Sep 17 00:00:00 2001 From: Gregory Etelson Date: Thu, 6 Nov 2025 07:52:44 +0200 Subject: [PATCH 01/99] ethdev: make representor parameter more explicit The current format for a port representor parameter is '-a DBDF,representor=pfXvfY'. That parameter syntax describes port representor relative to PCI device DBDF. In that notation VF Y belongs to PF X and PF X is relative to DBDF. The syntax 'pfXvfY' will probe 2 port representors: PF X and VF Y. If we want to refer only to VF Y related to PF X, the parameter must be '(pfX)vfY'. In this case only VF Y representor will be probed. Signed-off-by: Gregory Etelson Acked-by: Andrew Rybchenko Acked-by: Thomas Monjalon Acked-by: Stephen Hemminger --- doc/guides/prog_guide/ethdev/ethdev.rst | 27 ++++++++++++++++++++----- lib/ethdev/ethdev_driver.h | 5 +++++ lib/ethdev/ethdev_private.c | 13 ++++++++++-- 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/doc/guides/prog_guide/ethdev/ethdev.rst b/doc/guides/prog_guide/ethdev/ethdev.rst index 89eb31a48d4..daaf43ea3be 100644 --- a/doc/guides/prog_guide/ethdev/ethdev.rst +++ b/doc/guides/prog_guide/ethdev/ethdev.rst @@ -379,18 +379,35 @@ parameters to those ports. -a DBDF,representor=vf[0,4,6,9] -a DBDF,representor=vf[0-31] -a DBDF,representor=vf[0,2-4,7,9-11] + + These examples will attach VF representors relative to DBDF. + The VF IDs can be a list, a range or a mix. + SF representors follow the same syntax:: + -a DBDF,representor=sf0 -a DBDF,representor=sf[1,3,5] -a DBDF,representor=sf[0-1023] -a DBDF,representor=sf[0,2-4,7,9-11] + + If there are multiple PFs associated with the same PCI device, + the PF ID must be used to distinguish between representors relative to different PFs:: + -a DBDF,representor=pf1vf0 - -a DBDF,representor=pf[0-1]sf[0-127] - -a DBDF,representor=pf1 + -a DBDF,representor=pf[0-1]vf0 + + The example above will attach 4 representors pf0vf0, pf1vf0, pf0 and pf1. + If only VF representors are required, the PF part must be enclosed with parentheses:: + + -a DBDF,representor=(pf[0-1])vf0 + + The example above will attach 2 representors pf0vf0, pf1vf0. + + List of representors for the same PCI device is enclosed in square brackets:: + -a DBDF,representor=[pf[0-1],pf2vf[0-2],pf3[3,5-8]] - (Multiple representors in one device argument can be represented as a list) -Note: PMDs are not required to support the standard device arguments and users -should consult the relevant PMD documentation to see support devargs. + Note: PMDs may have additional extensions for the representor parameter, and users + should consult the relevant PMD documentation to see support devargs. Extended Statistics API ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h index db0b3d2c407..1255cd6f2c5 100644 --- a/lib/ethdev/ethdev_driver.h +++ b/lib/ethdev/ethdev_driver.h @@ -2012,6 +2012,10 @@ __rte_internal int rte_eth_switch_domain_free(uint16_t domain_id); +/* Flags for rte_eth_devargs::flags. */ +/* When enclosed in parentheses, the PF representor is not required. */ +#define RTE_ETH_DEVARG_REPRESENTOR_IGNORE_PF RTE_BIT32(0) + /** * Generic Ethernet device arguments * @@ -2026,6 +2030,7 @@ struct rte_eth_devargs { /** port/s number to enable on a multi-port single function */ uint16_t nb_ports; /** number of ports in ports field */ + uint32_t flags; /* see RTE_ETH_DEVARG_* */ uint16_t representor_ports[RTE_MAX_ETHPORTS]; /** representor port/s identifier to enable on device */ uint16_t nb_representor_ports; diff --git a/lib/ethdev/ethdev_private.c b/lib/ethdev/ethdev_private.c index a881e9c0031..72a0723846b 100644 --- a/lib/ethdev/ethdev_private.c +++ b/lib/ethdev/ethdev_private.c @@ -152,11 +152,20 @@ rte_eth_devargs_parse_representor_ports(char *str, void *data) if (str == NULL) goto done; } - if (str[0] == 'p' && str[1] == 'f') { + /* pfX... or (pfX)... */ + if ((str[0] == 'p' && str[1] == 'f') || + (str[0] == '(' && str[1] == 'p' && str[2] == 'f')) { eth_da->type = RTE_ETH_REPRESENTOR_PF; - str += 2; + if (str[0] == '(') + str++; /* advance past leading "(" */ + str += 2; /* advance past "pf" */ str = rte_eth_devargs_process_list(str, eth_da->ports, ð_da->nb_ports, RTE_DIM(eth_da->ports)); + if (str != NULL && str[0] == ')') { + str++; /* advance past ")" */ + eth_da->flags = + RTE_ETH_DEVARG_REPRESENTOR_IGNORE_PF; + } if (str == NULL || str[0] == '\0') goto done; } else if (eth_da->nb_mh_controllers > 0) { From f87fa31a9304210799698a811e3333015262b5fe Mon Sep 17 00:00:00 2001 From: Gregory Etelson Date: Thu, 6 Nov 2025 07:52:45 +0200 Subject: [PATCH 02/99] net/mlx5: support PF representor suppression In multi-port E-Switch setup, the MLX5 PMD always added PF representor port. For example, `representor=pf1vf[0,1]` implicitly added PF1 representor port: ``` Port Name 0 p0 1 p1 2 representor_c0pf1vf0 3 representor_c0pf1vf1 ``` The patch adds support for the new representor format that suppresses PF representor attachment: Example: `representor=(pf1)vf[0,1]` ``` Port Name 0 p0 1 representor_c0pf1vf0 2 representor_c0pf1vf1 ``` Signed-off-by: Gregory Etelson --- drivers/net/mlx5/linux/mlx5_os.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index c742e0f2825..dba3b61b68d 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -2284,6 +2284,12 @@ mlx5_device_mpesw_pci_match(struct ibv_device *ibv, return -1; } +static inline bool +mlx5_ignore_pf_representor(const struct rte_eth_devargs *eth_da) +{ + return (eth_da->flags & RTE_ETH_DEVARG_REPRESENTOR_IGNORE_PF) != 0; +} + /** * Register a PCI device within bonding. * @@ -2592,6 +2598,8 @@ mlx5_os_pci_probe_pf(struct mlx5_common_device *cdev, if (list[ns].info.port_name == mpesw) { list[ns].info.master = 1; list[ns].info.representor = 0; + } else if (mlx5_ignore_pf_representor(ð_da)) { + continue; } else { list[ns].info.master = 0; list[ns].info.representor = 1; From c58bdc7a589cc0ca52c8e8e95becd322ad8a4080 Mon Sep 17 00:00:00 2001 From: Chengwen Feng Date: Tue, 11 Nov 2025 17:13:02 +0800 Subject: [PATCH 03/99] app/testpmd: set DCB forwarding TCs This commit supports specify TCs when DCB forwarding, the command: set dcb fwd_tc (tc_mask) The background of this command: only some TCs are expected to generate traffic when the DCB function is tested based on txonly forwarding, we could use this command to specify TCs to be used. Signed-off-by: Chengwen Feng Acked-by: Huisong Li --- app/test-pmd/cmdline.c | 57 +++++++++++++++++++++ app/test-pmd/config.c | 50 +++++++++++++++++- app/test-pmd/testpmd.c | 6 +++ app/test-pmd/testpmd.h | 3 ++ doc/guides/testpmd_app_ug/testpmd_funcs.rst | 8 +++ 5 files changed, 122 insertions(+), 2 deletions(-) diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index 22afbdbad3a..aa5948fb250 100644 --- a/app/test-pmd/cmdline.c +++ b/app/test-pmd/cmdline.c @@ -511,6 +511,9 @@ static void cmd_help_long_parsed(void *parsed_result, "set fwd (%s)\n" " Set packet forwarding mode.\n\n" + "set dcb fwd_tc (tc_mask)\n" + " Set DCB forwarding on specify TCs, if bit-n in tc-mask is 1, then TC-n's forwarding is enabled\n\n" + "mac_addr add (port_id) (XX:XX:XX:XX:XX:XX)\n" " Add a MAC address on port_id.\n\n" @@ -6224,6 +6227,59 @@ static void cmd_set_fwd_retry_mode_init(void) token_struct->string_data.str = token; } +/* *** set DCB forward TCs *** */ +struct cmd_set_dcb_fwd_tc_result { + cmdline_fixed_string_t set; + cmdline_fixed_string_t dcb; + cmdline_fixed_string_t fwd_tc; + uint8_t tc_mask; +}; + +static void cmd_set_dcb_fwd_tc_parsed(void *parsed_result, + __rte_unused struct cmdline *cl, + __rte_unused void *data) +{ + struct cmd_set_dcb_fwd_tc_result *res = parsed_result; + int i; + if (res->tc_mask == 0) { + fprintf(stderr, "TC mask should not be zero!\n"); + return; + } + printf("Enabled DCB forwarding TC list:"); + dcb_fwd_tc_mask = res->tc_mask; + for (i = 0; i < RTE_ETH_8_TCS; i++) { + if (dcb_fwd_tc_mask & (1u << i)) + printf(" %d", i); + } + printf("\n"); +} + +static cmdline_parse_token_string_t cmd_set_dcb_fwd_tc_set = + TOKEN_STRING_INITIALIZER(struct cmd_set_dcb_fwd_tc_result, + set, "set"); +static cmdline_parse_token_string_t cmd_set_dcb_fwd_tc_dcb = + TOKEN_STRING_INITIALIZER(struct cmd_set_dcb_fwd_tc_result, + dcb, "dcb"); +static cmdline_parse_token_string_t cmd_set_dcb_fwd_tc_fwdtc = + TOKEN_STRING_INITIALIZER(struct cmd_set_dcb_fwd_tc_result, + fwd_tc, "fwd_tc"); +static cmdline_parse_token_num_t cmd_set_dcb_fwd_tc_tcmask = + TOKEN_NUM_INITIALIZER(struct cmd_set_dcb_fwd_tc_result, + tc_mask, RTE_UINT8); + +static cmdline_parse_inst_t cmd_set_dcb_fwd_tc = { + .f = cmd_set_dcb_fwd_tc_parsed, + .data = NULL, + .help_str = "config DCB forwarding on specify TCs, if bit-n in tc-mask is 1, then TC-n's forwarding is enabled, and vice versa.", + .tokens = { + (void *)&cmd_set_dcb_fwd_tc_set, + (void *)&cmd_set_dcb_fwd_tc_dcb, + (void *)&cmd_set_dcb_fwd_tc_fwdtc, + (void *)&cmd_set_dcb_fwd_tc_tcmask, + NULL, + }, +}; + /* *** SET BURST TX DELAY TIME RETRY NUMBER *** */ struct cmd_set_burst_tx_retry_result { cmdline_fixed_string_t set; @@ -14003,6 +14059,7 @@ static cmdline_parse_ctx_t builtin_ctx[] = { &cmd_set_fwd_mask, &cmd_set_fwd_mode, &cmd_set_fwd_retry_mode, + &cmd_set_dcb_fwd_tc, &cmd_set_burst_tx_retry, &cmd_set_promisc_mode_one, &cmd_set_promisc_mode_all, diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c index 8557371488b..88c1e99c5ea 100644 --- a/app/test-pmd/config.c +++ b/app/test-pmd/config.c @@ -5121,12 +5121,48 @@ get_fwd_port_total_tc_num(void) for (i = 0; i < nb_fwd_ports; i++) { (void)rte_eth_dev_get_dcb_info(fwd_ports_ids[i], &dcb_info); - total_tc_num += dcb_info.nb_tcs; + total_tc_num += rte_popcount32(dcb_fwd_tc_mask & ((1u << dcb_info.nb_tcs) - 1)); } return total_tc_num; } +static void +dcb_fwd_tc_update_dcb_info(struct rte_eth_dcb_info *org_dcb_info) +{ + struct rte_eth_dcb_info dcb_info = {0}; + uint32_t i, vmdq_idx; + uint32_t tc = 0; + + if (dcb_fwd_tc_mask == DEFAULT_DCB_FWD_TC_MASK) + return; + + /* + * Use compress scheme to update dcb-info. + * E.g. If org_dcb_info->nb_tcs is 4 and dcb_fwd_tc_mask is 0x8, it + * means only enable TC3, then the new dcb-info's nb_tcs is set to + * 1, and also move corresponding tc_rxq and tc_txq info to new + * index. + */ + for (i = 0; i < org_dcb_info->nb_tcs; i++) { + if (!(dcb_fwd_tc_mask & (1u << i))) + continue; + for (vmdq_idx = 0; vmdq_idx < RTE_ETH_MAX_VMDQ_POOL; vmdq_idx++) { + dcb_info.tc_queue.tc_rxq[vmdq_idx][tc].base = + org_dcb_info->tc_queue.tc_rxq[vmdq_idx][i].base; + dcb_info.tc_queue.tc_rxq[vmdq_idx][tc].nb_queue = + org_dcb_info->tc_queue.tc_rxq[vmdq_idx][i].nb_queue; + dcb_info.tc_queue.tc_txq[vmdq_idx][tc].base = + org_dcb_info->tc_queue.tc_txq[vmdq_idx][i].base; + dcb_info.tc_queue.tc_txq[vmdq_idx][tc].nb_queue = + org_dcb_info->tc_queue.tc_txq[vmdq_idx][i].nb_queue; + } + tc++; + } + dcb_info.nb_tcs = tc; + *org_dcb_info = dcb_info; +} + /** * For the DCB forwarding test, each core is assigned on each traffic class. * @@ -5176,11 +5212,17 @@ dcb_fwd_config_setup(void) } } + total_tc_num = get_fwd_port_total_tc_num(); + if (total_tc_num == 0) { + fprintf(stderr, "Error: total forwarding TC num is zero!\n"); + cur_fwd_config.nb_fwd_lcores = 0; + return; + } + cur_fwd_config.nb_fwd_lcores = (lcoreid_t) nb_fwd_lcores; cur_fwd_config.nb_fwd_ports = nb_fwd_ports; cur_fwd_config.nb_fwd_streams = (streamid_t) (nb_rxq * cur_fwd_config.nb_fwd_ports); - total_tc_num = get_fwd_port_total_tc_num(); if (cur_fwd_config.nb_fwd_lcores > total_tc_num) cur_fwd_config.nb_fwd_lcores = total_tc_num; @@ -5190,7 +5232,9 @@ dcb_fwd_config_setup(void) txp = fwd_topology_tx_port_get(rxp); /* get the dcb info on the first RX and TX ports */ (void)rte_eth_dev_get_dcb_info(fwd_ports_ids[rxp], &rxp_dcb_info); + dcb_fwd_tc_update_dcb_info(&rxp_dcb_info); (void)rte_eth_dev_get_dcb_info(fwd_ports_ids[txp], &txp_dcb_info); + dcb_fwd_tc_update_dcb_info(&txp_dcb_info); for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++) { fwd_lcores[lc_id]->stream_nb = 0; @@ -5238,7 +5282,9 @@ dcb_fwd_config_setup(void) txp = fwd_topology_tx_port_get(rxp); /* get the dcb information on next RX and TX ports */ rte_eth_dev_get_dcb_info(fwd_ports_ids[rxp], &rxp_dcb_info); + dcb_fwd_tc_update_dcb_info(&rxp_dcb_info); rte_eth_dev_get_dcb_info(fwd_ports_ids[txp], &txp_dcb_info); + dcb_fwd_tc_update_dcb_info(&txp_dcb_info); } } diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index b10f6baee2a..04a4727b882 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -211,6 +211,12 @@ struct fwd_engine * fwd_engines[] = { NULL, }; +/* + * Bitmask for control DCB forwarding for TCs. + * If bit-n in tc-mask is 1, then TC-n's forwarding is enabled, and vice versa. + */ +uint8_t dcb_fwd_tc_mask = DEFAULT_DCB_FWD_TC_MASK; + struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT]; uint16_t mempool_flags; diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index fa46865c67b..1ada0de450d 100644 --- a/app/test-pmd/testpmd.h +++ b/app/test-pmd/testpmd.h @@ -484,6 +484,9 @@ extern cmdline_parse_inst_t cmd_show_set_raw_all; extern cmdline_parse_inst_t cmd_set_flex_is_pattern; extern cmdline_parse_inst_t cmd_set_flex_spec_pattern; +#define DEFAULT_DCB_FWD_TC_MASK 0xFF +extern uint8_t dcb_fwd_tc_mask; + extern uint16_t mempool_flags; /** diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst b/doc/guides/testpmd_app_ug/testpmd_funcs.rst index e423abd40ea..6c4925a868b 100644 --- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst +++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst @@ -1487,6 +1487,14 @@ Where: * ``pause_time`` (integer): Pause quanta filled in the PFC frame for which interval, remote Tx will be paused. Valid only if Tx pause is on. +Set dcb fwd_tc +~~~~~~~~~~~~~~ + +Config DCB forwarding on specify TCs, if bit-n in tc-mask is 1, then TC-n's +forwarding is enabled, and vice versa:: + + testpmd> set dcb fwd_tc (tc_mask) + Set Rx queue available descriptors threshold ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 945e9be0a80335b56e637a04b2acb11bcd0816a1 Mon Sep 17 00:00:00 2001 From: Chengwen Feng Date: Tue, 11 Nov 2025 17:13:03 +0800 Subject: [PATCH 04/99] app/testpmd: support multi-cores process one TC Currently, one TC can be processed by only one core, when there are a large number of small packets, this core becomes a bottleneck. This commit supports multi-cores process one TC, the command: set dcb fwd_tc_cores (tc_cores) Signed-off-by: Chengwen Feng Acked-by: Huisong Li --- app/test-pmd/cmdline.c | 48 ++++++++++++ app/test-pmd/config.c | 85 ++++++++++++++++----- app/test-pmd/testpmd.c | 9 +++ app/test-pmd/testpmd.h | 1 + doc/guides/testpmd_app_ug/testpmd_funcs.rst | 8 ++ 5 files changed, 134 insertions(+), 17 deletions(-) diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index aa5948fb250..c33c66f3271 100644 --- a/app/test-pmd/cmdline.c +++ b/app/test-pmd/cmdline.c @@ -6280,6 +6280,53 @@ static cmdline_parse_inst_t cmd_set_dcb_fwd_tc = { }, }; +/* *** set DCB forward cores per TC *** */ +struct cmd_set_dcb_fwd_tc_cores_result { + cmdline_fixed_string_t set; + cmdline_fixed_string_t dcb; + cmdline_fixed_string_t fwd_tc_cores; + uint8_t tc_cores; +}; + +static void cmd_set_dcb_fwd_tc_cores_parsed(void *parsed_result, + __rte_unused struct cmdline *cl, + __rte_unused void *data) +{ + struct cmd_set_dcb_fwd_tc_cores_result *res = parsed_result; + if (res->tc_cores == 0) { + fprintf(stderr, "Cores per-TC should not be zero!\n"); + return; + } + dcb_fwd_tc_cores = res->tc_cores; + printf("Set cores-per-TC: %u\n", dcb_fwd_tc_cores); +} + +static cmdline_parse_token_string_t cmd_set_dcb_fwd_tc_cores_set = + TOKEN_STRING_INITIALIZER(struct cmd_set_dcb_fwd_tc_cores_result, + set, "set"); +static cmdline_parse_token_string_t cmd_set_dcb_fwd_tc_cores_dcb = + TOKEN_STRING_INITIALIZER(struct cmd_set_dcb_fwd_tc_cores_result, + dcb, "dcb"); +static cmdline_parse_token_string_t cmd_set_dcb_fwd_tc_cores_fwdtccores = + TOKEN_STRING_INITIALIZER(struct cmd_set_dcb_fwd_tc_cores_result, + fwd_tc_cores, "fwd_tc_cores"); +static cmdline_parse_token_num_t cmd_set_dcb_fwd_tc_cores_tccores = + TOKEN_NUM_INITIALIZER(struct cmd_set_dcb_fwd_tc_cores_result, + tc_cores, RTE_UINT8); + +static cmdline_parse_inst_t cmd_set_dcb_fwd_tc_cores = { + .f = cmd_set_dcb_fwd_tc_cores_parsed, + .data = NULL, + .help_str = "config DCB forwarding cores per-TC, 1-means one core process all queues of a TC.", + .tokens = { + (void *)&cmd_set_dcb_fwd_tc_cores_set, + (void *)&cmd_set_dcb_fwd_tc_cores_dcb, + (void *)&cmd_set_dcb_fwd_tc_cores_fwdtccores, + (void *)&cmd_set_dcb_fwd_tc_cores_tccores, + NULL, + }, +}; + /* *** SET BURST TX DELAY TIME RETRY NUMBER *** */ struct cmd_set_burst_tx_retry_result { cmdline_fixed_string_t set; @@ -14060,6 +14107,7 @@ static cmdline_parse_ctx_t builtin_ctx[] = { &cmd_set_fwd_mode, &cmd_set_fwd_retry_mode, &cmd_set_dcb_fwd_tc, + &cmd_set_dcb_fwd_tc_cores, &cmd_set_burst_tx_retry, &cmd_set_promisc_mode_one, &cmd_set_promisc_mode_all, diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c index 88c1e99c5ea..6ea506254b6 100644 --- a/app/test-pmd/config.c +++ b/app/test-pmd/config.c @@ -5112,6 +5112,36 @@ rss_fwd_config_setup(void) } } +static int +dcb_fwd_check_cores_per_tc(void) +{ + struct rte_eth_dcb_info dcb_info = {0}; + uint32_t port, tc, vmdq_idx; + + if (dcb_fwd_tc_cores == 1) + return 0; + + for (port = 0; port < nb_fwd_ports; port++) { + (void)rte_eth_dev_get_dcb_info(fwd_ports_ids[port], &dcb_info); + for (tc = 0; tc < dcb_info.nb_tcs; tc++) { + for (vmdq_idx = 0; vmdq_idx < RTE_ETH_MAX_VMDQ_POOL; vmdq_idx++) { + if (dcb_info.tc_queue.tc_rxq[vmdq_idx][tc].nb_queue == 0) + break; + /* make sure nb_rx_queue can be divisible. */ + if (dcb_info.tc_queue.tc_rxq[vmdq_idx][tc].nb_queue % + dcb_fwd_tc_cores) + return -1; + /* make sure nb_tx_queue can be divisible. */ + if (dcb_info.tc_queue.tc_txq[vmdq_idx][tc].nb_queue % + dcb_fwd_tc_cores) + return -1; + } + } + } + + return 0; +} + static uint16_t get_fwd_port_total_tc_num(void) { @@ -5164,14 +5194,17 @@ dcb_fwd_tc_update_dcb_info(struct rte_eth_dcb_info *org_dcb_info) } /** - * For the DCB forwarding test, each core is assigned on each traffic class. + * For the DCB forwarding test, each core is assigned on each traffic class + * defaultly: + * Each core is assigned a multi-stream, each stream being composed of + * a RX queue to poll on a RX port for input messages, associated with + * a TX queue of a TX port where to send forwarded packets. All RX and + * TX queues are mapping to the same traffic class. + * If VMDQ and DCB co-exist, each traffic class on different POOLs share + * the same core. * - * Each core is assigned a multi-stream, each stream being composed of - * a RX queue to poll on a RX port for input messages, associated with - * a TX queue of a TX port where to send forwarded packets. All RX and - * TX queues are mapping to the same traffic class. - * If VMDQ and DCB co-exist, each traffic class on different POOLs share - * the same core + * If user set cores-per-TC to other value (e.g. 2), then there will multiple + * cores to process one TC. */ static void dcb_fwd_config_setup(void) @@ -5179,9 +5212,10 @@ dcb_fwd_config_setup(void) struct rte_eth_dcb_info rxp_dcb_info, txp_dcb_info; portid_t txp, rxp = 0; queueid_t txq, rxq = 0; - lcoreid_t lc_id; + lcoreid_t lc_id, target_lcores; uint16_t nb_rx_queue, nb_tx_queue; uint16_t i, j, k, sm_id = 0; + uint16_t sub_core_idx = 0; uint16_t total_tc_num; struct rte_port *port; uint8_t tc = 0; @@ -5212,6 +5246,13 @@ dcb_fwd_config_setup(void) } } + ret = dcb_fwd_check_cores_per_tc(); + if (ret != 0) { + fprintf(stderr, "Error: check forwarding cores-per-TC failed!\n"); + cur_fwd_config.nb_fwd_lcores = 0; + return; + } + total_tc_num = get_fwd_port_total_tc_num(); if (total_tc_num == 0) { fprintf(stderr, "Error: total forwarding TC num is zero!\n"); @@ -5219,12 +5260,17 @@ dcb_fwd_config_setup(void) return; } - cur_fwd_config.nb_fwd_lcores = (lcoreid_t) nb_fwd_lcores; + target_lcores = (lcoreid_t)total_tc_num * (lcoreid_t)dcb_fwd_tc_cores; + if (nb_fwd_lcores < target_lcores) { + fprintf(stderr, "Error: the number of forwarding cores is insufficient!\n"); + cur_fwd_config.nb_fwd_lcores = 0; + return; + } + + cur_fwd_config.nb_fwd_lcores = target_lcores; cur_fwd_config.nb_fwd_ports = nb_fwd_ports; cur_fwd_config.nb_fwd_streams = (streamid_t) (nb_rxq * cur_fwd_config.nb_fwd_ports); - if (cur_fwd_config.nb_fwd_lcores > total_tc_num) - cur_fwd_config.nb_fwd_lcores = total_tc_num; /* reinitialize forwarding streams */ init_fwd_streams(); @@ -5247,10 +5293,12 @@ dcb_fwd_config_setup(void) break; k = fwd_lcores[lc_id]->stream_nb + fwd_lcores[lc_id]->stream_idx; - rxq = rxp_dcb_info.tc_queue.tc_rxq[i][tc].base; - txq = txp_dcb_info.tc_queue.tc_txq[i][tc].base; - nb_rx_queue = rxp_dcb_info.tc_queue.tc_rxq[i][tc].nb_queue; - nb_tx_queue = txp_dcb_info.tc_queue.tc_txq[i][tc].nb_queue; + nb_rx_queue = rxp_dcb_info.tc_queue.tc_rxq[i][tc].nb_queue / + dcb_fwd_tc_cores; + nb_tx_queue = txp_dcb_info.tc_queue.tc_txq[i][tc].nb_queue / + dcb_fwd_tc_cores; + rxq = rxp_dcb_info.tc_queue.tc_rxq[i][tc].base + nb_rx_queue * sub_core_idx; + txq = txp_dcb_info.tc_queue.tc_txq[i][tc].base + nb_tx_queue * sub_core_idx; for (j = 0; j < nb_rx_queue; j++) { struct fwd_stream *fs; @@ -5262,11 +5310,14 @@ dcb_fwd_config_setup(void) fs->peer_addr = fs->tx_port; fs->retry_enabled = retry_enabled; } - fwd_lcores[lc_id]->stream_nb += - rxp_dcb_info.tc_queue.tc_rxq[i][tc].nb_queue; + sub_core_idx++; + fwd_lcores[lc_id]->stream_nb += nb_rx_queue; } sm_id = (streamid_t) (sm_id + fwd_lcores[lc_id]->stream_nb); + if (sub_core_idx < dcb_fwd_tc_cores) + continue; + sub_core_idx = 0; tc++; if (tc < rxp_dcb_info.nb_tcs) continue; diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index 04a4727b882..1fe41d852a4 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -216,6 +216,15 @@ struct fwd_engine * fwd_engines[] = { * If bit-n in tc-mask is 1, then TC-n's forwarding is enabled, and vice versa. */ uint8_t dcb_fwd_tc_mask = DEFAULT_DCB_FWD_TC_MASK; +/* + * Poll cores per TC when DCB forwarding. + * E.g. 1 indicates that one core process all queues of a TC. + * 2 indicates that two cores process all queues of a TC. If there + * is a TC with 8 queues, then [0, 3] belong to first core, and + * [4, 7] belong to second core. + * ... + */ +uint8_t dcb_fwd_tc_cores = 1; struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT]; uint16_t mempool_flags; diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index 1ada0de450d..492b5757f11 100644 --- a/app/test-pmd/testpmd.h +++ b/app/test-pmd/testpmd.h @@ -486,6 +486,7 @@ extern cmdline_parse_inst_t cmd_set_flex_spec_pattern; #define DEFAULT_DCB_FWD_TC_MASK 0xFF extern uint8_t dcb_fwd_tc_mask; +extern uint8_t dcb_fwd_tc_cores; extern uint16_t mempool_flags; diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst b/doc/guides/testpmd_app_ug/testpmd_funcs.rst index 6c4925a868b..62bb167d56c 100644 --- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst +++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst @@ -1495,6 +1495,14 @@ forwarding is enabled, and vice versa:: testpmd> set dcb fwd_tc (tc_mask) +set dcb fwd_tc_cores +~~~~~~~~~~~~~~~~~~~~ + +Config DCB forwarding cores per-TC, 1-means one core process all queues of a TC, +2-means two cores process all queues of a TC, and so on:: + + testpmd> set dcb fwd_tc_cores (tc_cores) + Set Rx queue available descriptors threshold ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From dec46366290076afa65bc6b505b274e10e90f494 Mon Sep 17 00:00:00 2001 From: Venkat Kumar Ande Date: Thu, 13 Nov 2025 22:11:09 +0530 Subject: [PATCH 05/99] net/axgbe: fix build with GCC 16 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Build warning reported for local variable set and not used on GCC-16. Fix the issue by using the variable for missed register write operation. ../drivers/net/axgbe/axgbe_ethdev.c: In function ‘axgbe_timesync_disable’: ../drivers/net/axgbe/axgbe_ethdev.c:1744:22: warning: variable ‘mac_tscr’ set but not used [-Wunused-but-set-variable=] 1744 | unsigned int mac_tscr = 0; | ^~~~~~~~ Bugzilla ID: 1819 Fixes: e04449488fdb ("net/axgbe: support IEEE 1588 PTP") Cc: stable@dpdk.org Signed-off-by: Venkat Kumar Ande --- drivers/net/axgbe/axgbe_ethdev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/axgbe/axgbe_ethdev.c b/drivers/net/axgbe/axgbe_ethdev.c index 31d35ff182e..cf3b0d9ef54 100644 --- a/drivers/net/axgbe/axgbe_ethdev.c +++ b/drivers/net/axgbe/axgbe_ethdev.c @@ -1742,6 +1742,7 @@ axgbe_timesync_disable(struct rte_eth_dev *dev) { struct axgbe_port *pdata = dev->data->dev_private; unsigned int mac_tscr = 0; + unsigned int value = 0; /*disable timestamp for all pkts*/ AXGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENALL, 0); @@ -1751,6 +1752,11 @@ axgbe_timesync_disable(struct rte_eth_dev *dev) AXGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSCFUPDT, 0); /*disable time stamp*/ AXGMAC_SET_BITS(mac_tscr, MAC_TSCR, TSENA, 0); + + value = AXGMAC_IOREAD(pdata, MAC_TSCR); + value |= mac_tscr; + AXGMAC_IOWRITE(pdata, MAC_TSCR, value); + return 0; } From 3b8225353ea3b9eb03df5dee94a037582d0d6748 Mon Sep 17 00:00:00 2001 From: Hemant Agrawal Date: Fri, 14 Nov 2025 11:54:51 +0530 Subject: [PATCH 06/99] net/dpaa2: fix duplicate call of close MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When rte_eth_dev_close() is called, it performs the following actions: Calls dev->dev_ops->dev_close(), which in this case is dpaa2_dev_close(). Then calls rte_eth_dev_release_port(), which releases all device data and sets dev->data to NULL. Later, when rte_dev_remove() is called, the FSLMC bus invokes dev->remove() — that is, rte_dpaa2_remove(). However, rte_dpaa2_remove() calls dpaa2_dev_close() again. Since dev->data was already set to NULL by the previous call, this second invocation causes a crash. Fixes: 5964d36a2904 ("net/dpaa2: release port upon close") Cc: stable@dpdk.org Signed-off-by: Hemant Agrawal Tested-by: Maxime Leroy --- drivers/net/dpaa2/dpaa2_ethdev.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/dpaa2/dpaa2_ethdev.c b/drivers/net/dpaa2/dpaa2_ethdev.c index 7da32ce8562..fcda267e0b3 100644 --- a/drivers/net/dpaa2/dpaa2_ethdev.c +++ b/drivers/net/dpaa2/dpaa2_ethdev.c @@ -3347,14 +3347,22 @@ static int rte_dpaa2_remove(struct rte_dpaa2_device *dpaa2_dev) { struct rte_eth_dev *eth_dev; - int ret; + int ret = 0; + + eth_dev = rte_eth_dev_allocated(dpaa2_dev->device.name); + if (eth_dev) { + ret = dpaa2_dev_close(eth_dev); + if (ret) + DPAA2_PMD_ERR("dpaa2_dev_close ret= %d", ret); + + ret = rte_eth_dev_release_port(eth_dev); + } - eth_dev = dpaa2_dev->eth_dev; - dpaa2_dev_close(eth_dev); dpaa2_valid_dev--; - if (!dpaa2_valid_dev) + if (!dpaa2_valid_dev) { rte_mempool_free(dpaa2_tx_sg_pool); - ret = rte_eth_dev_release_port(eth_dev); + dpaa2_tx_sg_pool = NULL; + } return ret; } From 360a8d674c36e628523bdd83bedd27ce34b3091b Mon Sep 17 00:00:00 2001 From: Maxime Leroy Date: Fri, 14 Nov 2025 11:54:52 +0530 Subject: [PATCH 07/99] net/dpaa2: clear active VDQ state when freeing Rx queues When using the prefetch Rx path (dpaa2_dev_prefetch_rx), the driver keeps track of one outstanding VDQCR command per DPIO portal in the global rte_global_active_dqs_list[] array. Each queue_storage_info_t also stores the active result buffer and portal index: qs->active_dqs qs->active_dpio_id Before issuing a new pull command, dpaa2_dev_prefetch_rx() checks for an active entry and spins on qbman_check_command_complete() until the corresponding VDQCR completes. On port close / hotplug remove, dpaa2_free_rx_tx_queues() frees all per-lcore queue_storage_info_t structures and their dq_storage[] buffers, but never clears the global rte_global_active_dqs_list[] entries. After a detach/attach sequence (or "del/add" in grout), the prefetch Rx path still sees an active entry for the portal and spins forever on a stale dq buffer that has been freed and will never be completed by hardware. In gdb, dq->dq.tok stays 0 and dpaa2_dev_prefetch_rx() loops in: while (!qbman_check_command_complete(get_swp_active_dqs(idx))) ; Fix this by clearing the active VDQ state before freeing queue storage. For each Rx queue and lcore, if qs->active_dqs is non-NULL, call clear_swp_active_dqs(qs->active_dpio_id) and set qs->active_dqs to NULL. Then dpaa2_queue_storage_free() can safely free q_storage and dq_storage[]. After this change, a DPNI detach/attach sequence no longer leaves stale entries in rte_global_active_dqs_list[], and the prefetch Rx loop does not hang waiting for a completion from a previous device instance. Reproduction: - grout: grcli interface add port dpni.1 devargs fslmc:dpni.1 grcli interface del dpni.1 grcli interface add port dpni.1 devargs fslmc:dpni.1 -> Rx was stuck in qbman_check_command_complete(), now works. - testpmd: dpdk-testpmd -n1 -a fslmc:dpni.65535 -- -i --forward-mode=rxonly testpmd> port attach fslmc:dpni.1 testpmd> port start all testpmd> start testpmd> stop testpmd> port stop all testpmd> port detach 0 testpmd> port attach fslmc:dpni.1 testpmd> port start all testpmd> start -> Rx was hanging, now runs normal Fixes: 12d98eceb8ac ("bus/fslmc: enhance QBMAN DQ storage logic") Cc: stable@dpdk.org Signed-off-by: Maxime Leroy Acked-by: Hemant Agrawal --- .mailmap | 2 +- drivers/net/dpaa2/dpaa2_ethdev.c | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 50a59a596af..2678c4a9ded 100644 --- a/.mailmap +++ b/.mailmap @@ -1037,7 +1037,7 @@ Mauricio Vasquez B Maxime Coquelin Maxime Gouin -Maxime Leroy +Maxime Leroy Md Fahad Iqbal Polash Megha Ajmera Meijuan Zhao diff --git a/drivers/net/dpaa2/dpaa2_ethdev.c b/drivers/net/dpaa2/dpaa2_ethdev.c index fcda267e0b3..f2b24fc9530 100644 --- a/drivers/net/dpaa2/dpaa2_ethdev.c +++ b/drivers/net/dpaa2/dpaa2_ethdev.c @@ -631,6 +631,27 @@ dpaa2_alloc_rx_tx_queues(struct rte_eth_dev *dev) return ret; } +static void +dpaa2_clear_queue_active_dps(struct dpaa2_queue *q, int num_lcores) +{ + int i; + + for (i = 0; i < num_lcores; i++) { + struct queue_storage_info_t *qs = q->q_storage[i]; + + if (!qs) + continue; + + if (qs->active_dqs) { + while (!qbman_check_command_complete(qs->active_dqs)) + continue; /* wait */ + + clear_swp_active_dqs(qs->active_dpio_id); + qs->active_dqs = NULL; + } + } +} + static void dpaa2_free_rx_tx_queues(struct rte_eth_dev *dev) { @@ -645,6 +666,8 @@ dpaa2_free_rx_tx_queues(struct rte_eth_dev *dev) /* cleaning up queue storage */ for (i = 0; i < priv->nb_rx_queues; i++) { dpaa2_q = priv->rx_vq[i]; + dpaa2_clear_queue_active_dps(dpaa2_q, + RTE_MAX_LCORE); dpaa2_queue_storage_free(dpaa2_q, RTE_MAX_LCORE); } From 46d02eeaaeb8bf93b69a72bd917b119f320c0cf1 Mon Sep 17 00:00:00 2001 From: Hemant Agrawal Date: Fri, 14 Nov 2025 11:54:53 +0530 Subject: [PATCH 08/99] net/dpaa2: fix queue freeing The Rx error queue was not being free. Also, set the free queues pointers to NULL. Fixes: 407ce3e5384b ("net/dpaa2: replace global variables with flags") Signed-off-by: Hemant Agrawal Tested-by: Maxime Leroy --- drivers/net/dpaa2/dpaa2_ethdev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/dpaa2/dpaa2_ethdev.c b/drivers/net/dpaa2/dpaa2_ethdev.c index f2b24fc9530..1dd4a1e32b0 100644 --- a/drivers/net/dpaa2/dpaa2_ethdev.c +++ b/drivers/net/dpaa2/dpaa2_ethdev.c @@ -670,11 +670,13 @@ dpaa2_free_rx_tx_queues(struct rte_eth_dev *dev) RTE_MAX_LCORE); dpaa2_queue_storage_free(dpaa2_q, RTE_MAX_LCORE); + priv->rx_vq[i] = NULL; } /* cleanup tx queue cscn */ for (i = 0; i < priv->nb_tx_queues; i++) { dpaa2_q = priv->tx_vq[i]; rte_free(dpaa2_q->cscn); + priv->tx_vq[i] = NULL; } if (priv->flags & DPAA2_TX_CONF_ENABLE) { /* cleanup tx conf queue storage */ @@ -682,8 +684,14 @@ dpaa2_free_rx_tx_queues(struct rte_eth_dev *dev) dpaa2_q = priv->tx_conf_vq[i]; dpaa2_queue_storage_free(dpaa2_q, RTE_MAX_LCORE); + priv->tx_conf_vq[i] = NULL; } } + if (priv->flags & DPAAX_RX_ERROR_QUEUE_FLAG) { + dpaa2_q = priv->rx_err_vq; + dpaa2_queue_storage_free(dpaa2_q, RTE_MAX_LCORE); + } + /*free memory for all queues (RX+TX) */ rte_free(priv->rx_vq[0]); priv->rx_vq[0] = NULL; From b5721f271cbf20d38e8ebc10b9444d0d2512b67a Mon Sep 17 00:00:00 2001 From: Hemant Agrawal Date: Fri, 14 Nov 2025 11:54:54 +0530 Subject: [PATCH 09/99] bus/fslmc: support DPNI hotplug This patch implements the plug and unplug function to support attach/detach of dpni interfaces. Signed-off-by: Hemant Agrawal Tested-by: Maxime Leroy --- drivers/bus/fslmc/fslmc_bus.c | 56 +++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/drivers/bus/fslmc/fslmc_bus.c b/drivers/bus/fslmc/fslmc_bus.c index 49c61c9d2df..4529ec5085e 100644 --- a/drivers/bus/fslmc/fslmc_bus.c +++ b/drivers/bus/fslmc/fslmc_bus.c @@ -589,17 +589,61 @@ rte_dpaa2_get_iommu_class(void) } static int -fslmc_bus_plug(struct rte_device *dev __rte_unused) +fslmc_bus_plug(struct rte_device *rte_dev) { - /* No operation is performed while plugging the device */ - return 0; + int ret = 0; + struct rte_dpaa2_device *dev = container_of(rte_dev, + struct rte_dpaa2_device, device); + struct rte_dpaa2_driver *drv; + + TAILQ_FOREACH(drv, &rte_fslmc_bus.driver_list, next) { + ret = rte_fslmc_match(drv, dev); + if (ret) + continue; + + if (!drv->probe) + continue; + + if (rte_dev_is_probed(&dev->device)) + continue; + + if (dev->device.devargs && + dev->device.devargs->policy == RTE_DEV_BLOCKED) { + DPAA2_BUS_DEBUG("%s Blocked, skipping", + dev->device.name); + continue; + } + + ret = drv->probe(drv, dev); + if (ret) { + DPAA2_BUS_ERR("Unable to probe"); + } else { + dev->driver = drv; + dev->device.driver = &drv->driver; + DPAA2_BUS_INFO("%s Plugged", dev->device.name); + } + break; + } + + return ret; } static int -fslmc_bus_unplug(struct rte_device *dev __rte_unused) +fslmc_bus_unplug(struct rte_device *rte_dev) { - /* No operation is performed while unplugging the device */ - return 0; + struct rte_dpaa2_device *dev = container_of(rte_dev, + struct rte_dpaa2_device, device); + struct rte_dpaa2_driver *drv = dev->driver; + + if (drv && drv->remove) { + drv->remove(dev); + dev->driver = NULL; + dev->device.driver = NULL; + DPAA2_BUS_INFO("%s Un-Plugged", dev->device.name); + return 0; + } + + return -ENODEV; } static void * From 39454e245b125ae555bc5a3a058cc3c1e1280f82 Mon Sep 17 00:00:00 2001 From: Maayan Kashani Date: Sun, 16 Nov 2025 14:14:37 +0200 Subject: [PATCH 10/99] app/testpmd: fix flex item link parsing The flex_link_item_parse function was using FLEX_MAX_FLOW_PATTERN_LENGTH for all memcpy operations regardless of the actual flow item type. This could lead to copying incorrect amounts of data. This patch adds a switch statement to determine the correct size based on the actual flow item type (IPv4, IPv6, UDP, TCP) and uses that size for the memcpy operations on spec, mask, and last fields. Also adds validation to reject unsupported item types. Fixes: 59f3a8acbcdb ("app/testpmd: add flex item commands") Cc: stable@dpdk.org Signed-off-by: Maayan Kashani --- app/test-pmd/cmd_flex_item.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/app/test-pmd/cmd_flex_item.c b/app/test-pmd/cmd_flex_item.c index e6e1cefeb3d..af6c087feba 100644 --- a/app/test-pmd/cmd_flex_item.c +++ b/app/test-pmd/cmd_flex_item.c @@ -143,21 +143,22 @@ flex_link_item_parse(const char *src, struct rte_flow_item *item) if (ret) return ret; item->type = pattern->type; - if (pattern->spec) { + ret = rte_flow_conv(RTE_FLOW_CONV_OP_ITEM_MASK, NULL, 0, item, NULL); + if ((ret > 0) && pattern->spec) { ptr = (void *)(uintptr_t)item->spec; - memcpy(ptr, pattern->spec, FLEX_MAX_FLOW_PATTERN_LENGTH); + memcpy(ptr, pattern->spec, ret); } else { item->spec = NULL; } - if (pattern->mask) { + if ((ret > 0) && pattern->mask) { ptr = (void *)(uintptr_t)item->mask; - memcpy(ptr, pattern->mask, FLEX_MAX_FLOW_PATTERN_LENGTH); + memcpy(ptr, pattern->mask, ret); } else { item->mask = NULL; } - if (pattern->last) { + if ((ret > 0) && pattern->last) { ptr = (void *)(uintptr_t)item->last; - memcpy(ptr, pattern->last, FLEX_MAX_FLOW_PATTERN_LENGTH); + memcpy(ptr, pattern->last, ret); } else { item->last = NULL; } From 82ff0aa59735fefa6e9e9daf77ea87da5b68fabd Mon Sep 17 00:00:00 2001 From: Andrzej Wilczynski Date: Mon, 10 Nov 2025 13:55:39 +0000 Subject: [PATCH 11/99] net/ixgbe/base: fix PF link state request size Currently, when requesting PF link state over VF mailbox, the buffer sized 3 dwords is allocated. However, the `ixgbevf_write_msg_read_ack` function is actually called with buffer size of 6 dwords. This leaves an admittedly remote possibility of buffer overrun. Fix by adjusting requested size to 3. Bugzilla ID: 1801 Fixes: adbd71030575 ("net/ixgbe/base: fix link status for E610") Cc: stable@dpdk.org Signed-off-by: Andrzej Wilczynski Signed-off-by: Anatoly Burakov Acked-by: Bruce Richardson --- drivers/net/intel/ixgbe/base/ixgbe_vf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/intel/ixgbe/base/ixgbe_vf.c b/drivers/net/intel/ixgbe/base/ixgbe_vf.c index 46a62bb8518..eb16fb19421 100644 --- a/drivers/net/intel/ixgbe/base/ixgbe_vf.c +++ b/drivers/net/intel/ixgbe/base/ixgbe_vf.c @@ -482,7 +482,7 @@ int ixgbevf_get_pf_link_state(struct ixgbe_hw *hw, ixgbe_link_speed *speed, msgbuf[0] = IXGBE_VF_GET_PF_LINK_STATE; - err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 6); + err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 3); if (err || (msgbuf[0] & IXGBE_VT_MSGTYPE_FAILURE)) { err = IXGBE_ERR_MBX; *speed = IXGBE_LINK_SPEED_UNKNOWN; From 61ccab85e3972d6e3ee61b3e6a6a6872a33e5ac3 Mon Sep 17 00:00:00 2001 From: Bruce Richardson Date: Wed, 12 Nov 2025 11:57:26 +0000 Subject: [PATCH 12/99] net/ice: fix path selection for QinQ Tx offload The capabilities flag for the vector offload path include the QinQ offload capability, but in fact the offload path lacks any ability to create context descriptors. This means that it cannot insert multiple vlan tags for QinQ support, so move the offload from the VECTOR_OFFLOAD list to the NO_VECTOR list. Similarly, remove any check for the QinQ mbuf flag in any packets being transmitted, since that offload is invalid to request if the feature is not enabled. Fixes: 808a17b3c1e6 ("net/ice: add Rx AVX512 offload path") Cc: stable@dpdk.org Signed-off-by: Bruce Richardson Acked-by: Ciara Loftus --- drivers/net/intel/ice/ice_rxtx_vec_common.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/intel/ice/ice_rxtx_vec_common.h b/drivers/net/intel/ice/ice_rxtx_vec_common.h index a24694c0b18..39581cb7ae6 100644 --- a/drivers/net/intel/ice/ice_rxtx_vec_common.h +++ b/drivers/net/intel/ice/ice_rxtx_vec_common.h @@ -53,6 +53,7 @@ _ice_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq) #define ICE_TX_NO_VECTOR_FLAGS ( \ RTE_ETH_TX_OFFLOAD_MULTI_SEGS | \ + RTE_ETH_TX_OFFLOAD_QINQ_INSERT | \ RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM | \ RTE_ETH_TX_OFFLOAD_TCP_TSO | \ RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO | \ @@ -64,7 +65,6 @@ _ice_rx_queue_release_mbufs_vec(struct ci_rx_queue *rxq) #define ICE_TX_VECTOR_OFFLOAD ( \ RTE_ETH_TX_OFFLOAD_VLAN_INSERT | \ - RTE_ETH_TX_OFFLOAD_QINQ_INSERT | \ RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | \ RTE_ETH_TX_OFFLOAD_SCTP_CKSUM | \ RTE_ETH_TX_OFFLOAD_UDP_CKSUM | \ @@ -195,8 +195,8 @@ ice_txd_enable_offload(struct rte_mbuf *tx_pkt, *txd_hi |= ((uint64_t)td_offset) << ICE_TXD_QW1_OFFSET_S; - /* Tx VLAN/QINQ insertion Offload */ - if (ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) { + /* Tx VLAN insertion Offload */ + if (ol_flags & RTE_MBUF_F_TX_VLAN) { td_cmd |= ICE_TX_DESC_CMD_IL2TAG1; *txd_hi |= ((uint64_t)tx_pkt->vlan_tci << ICE_TXD_QW1_L2TAG1_S); From 8724a85b9a89ceb84371c3fda8156a8ec47602e3 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Wed, 12 Nov 2025 15:11:23 +0000 Subject: [PATCH 13/99] net/intel: ensure correct Rx path is selected The common Rx path selection logic iterates through an array of candidate paths and selects the best fit for the requested features. Currently, in the event that two potential candidates are identified, the one with the fewer offloads (and thus less complex path) is selected. However this is not correct, because if the path with more offloads has a greater SIMD width, that should be chosen. This commit reworks the logic so that the number of offloads is only taken into consideration when choosing between two paths with the same SIMD width. Since the paths arrays are ordered from lowest SIMD width to highest, and vector paths tend to have fewer offloads enabled than scalar paths, "new" candidate paths with greater SIMDs widths tended to have fewer or equal offloads than the "current" candidate paths and thus were correctly accepted as the best candidate. For this reason the incorrect logic did not cause any incorrect path selections in practise. Fixes: 9d99641d80a0 ("net/intel: introduce infrastructure for Rx path selection") Signed-off-by: Ciara Loftus Acked-by: Bruce Richardson --- drivers/net/intel/common/rx.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h index 5012e4fced0..9fa3cdc64de 100644 --- a/drivers/net/intel/common/rx.h +++ b/drivers/net/intel/common/rx.h @@ -300,8 +300,11 @@ ci_rx_path_select(struct ci_rx_path_features req_features, /* Do not select paths with lower SIMD width than the current path. */ if (path_features->simd_width < current_features->simd_width) continue; - /* Do not select paths with more offloads enabled than the current path. */ - if (rte_popcount32(path_features->rx_offloads) > + /* Do not select paths with more offloads enabled than the current path if + * the SIMD widths are the same. + */ + if (path_features->simd_width == current_features->simd_width && + rte_popcount32(path_features->rx_offloads) > rte_popcount32(current_features->rx_offloads)) continue; /* Do not select paths without bulk alloc support if requested and the From b060d31577519a7b9c3fe3a151f375fad96f218b Mon Sep 17 00:00:00 2001 From: Anatoly Burakov Date: Wed, 12 Nov 2025 11:34:12 +0000 Subject: [PATCH 14/99] doc: explain protocol agnostic filtering in ice guide Current documentation for protocol agnostic filtering for ICE driver is a bit terse and relies on a lot of assumed knowledge. Document the feature better and make all of the assumptions explicit. Signed-off-by: Anatoly Burakov Acked-by: Vladimir Medvedkin --- doc/guides/nics/ice.rst | 264 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 253 insertions(+), 11 deletions(-) diff --git a/doc/guides/nics/ice.rst b/doc/guides/nics/ice.rst index 6cc27cefa7c..2d555d2943f 100644 --- a/doc/guides/nics/ice.rst +++ b/doc/guides/nics/ice.rst @@ -624,20 +624,262 @@ For each engine, a list of supported patterns is maintained in a global array named ``ice__supported_pattern``. The Ice PMD will reject any rule with a pattern that is not included in the supported list. -One notable feature is the ice PMD's ability to leverage the Raw pattern, -enabling protocol-agnostic flow offloading. Here is an example of creating -a rule that matches an IPv4 destination address of 1.2.3.4 and redirects it to -queue 3 using a raw pattern:: - - flow create 0 ingress group 2 pattern raw \ - pattern spec \ - 00000000000000000000000008004500001400004000401000000000000001020304 \ - pattern mask \ - 000000000000000000000000000000000000000000000000000000000000ffffffff \ - end actions queue index 3 / mark id 3 / end +Protocol Agnostic Filtering +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One notable feature is the ice PMD's ability to leverage the raw pattern, +enabling protocol-agnostic flow offloading. +This feature allows users to create flow rules for any protocol recognized by the hardware parser, +by manually specifying the raw packet structure. +Therefore, flow offloading can be used +even in cases where desired protocol isn't explicitly supported by the flow API. + +Raw Pattern Components +++++++++++++++++++++++ + +Raw patterns consist of two key components: + +**Pattern Spec** + An ASCII hexadecimal string representing the complete packet structure + that defines the packet type and protocol layout. + The hardware parser analyzes this structure to determine the packet type (PTYPE) + and identify protocol headers and their offsets. + This specification must represent a valid packet structure + that the hardware can parse and classify. + If the hardware parser does not support a particular protocol stack, + it may not correctly identify the packet type. + +**Pattern Mask** + An ASCII hexadecimal string of the same length as the spec + that determines which specific fields within the packet will be extracted and used for matching. + The mask control field extraction without affecting the packet type identification. + +.. note:: + + Raw pattern must be the only flow item in the flow item list. + +Generating Raw Pattern Values ++++++++++++++++++++++++++++++ + +To create raw patterns, follow these steps: + +#. **Verify parser support**: + Confirm that the hardware parser supports the protocol combination + needed for the intended flow rule. + This can be checked against the documentation for the DDP package currently in use. + +#. **Build the packet template**: + Create a complete, valid packet header + with all necessary sections (Ethernet, IP, UDP/TCP, etc.) + using the exact field values that need to be matched. + +#. **Convert to hexadecimal**: + Transform the entire header into a continuous ASCII hexadecimal string, + with each byte represented as two hex characters. + +#. **Create the extraction mask**: + Generate a mask of the same length as the spec, + where set bits would indicate the fields used for extraction/matching. + +VPP project's `flow_parse.py` script can be used +to generate packet templates and masks for raw patterns. +This tool takes a human-readable flow description +and outputs the corresponding ASCII hexadecimal spec and mask. +This script can be found under ``extras/packetforge`` +in `VPP project `_. + +Example usage: + +.. code-block:: console + + python3 flow_parse.py --show -p "mac()/ipv4(src=1.1.1.1,dst=2.2.2.2)/udp()" + +Output: + +.. code-block:: console + + {'flow': {'generic': {'pattern': {'spec': b'00000000000100000000000208004500001c000000000011000001010101020202020000000000080000', + 'mask': b'0000000000000000000000000000000000000000000000000000ffffffffffffffff0000000000000000'}}}} + +.. note:: + + Ensure the spec represents complete protocol headers, + as the hardware parser processes fields at 16-bit boundaries. + Incomplete or truncated headers may result in unpredictable field extraction behavior. + +Action Support and Usage +^^^^^^^^^^^^^^^^^^^^^^^^ + +After constructing the raw pattern spec and mask, +they can be used in the flow API with pattern type "raw". + +The following is an example of a minimal Ethernet + IPv4 header template. +Source and destination IPv4 addresses are part of the match key; all other fields are ignored. + +Spec (packet template): + +.. code-block:: + + 000000000001 Destination MAC (6 bytes) + 000000000002 Source MAC (6 bytes) + 0800 EtherType = IPv4 + 4500001c0000000000110000 IPv4 header, protocol = UDP + 01010101 Source IP = 1.1.1.1 + 02020202 Destination IP = 2.2.2.2 + 0000000000080000 UDP header + +Mask: + +.. code-block:: + + 000000000000 Destination MAC (ignored) + 000000000000 Source MAC (ignored) + 0000 EtherType (ignored) + 000000000000000000000000 IPv4/UDP header (ignored) + ffffffff Source IP (match all 32 bits) + ffffffff Destination IP (match all 32 bits) + 0000000000000000 UDP header (ignored) + +This spec will match any non-fragmented IPv4/UDP packet +whose source IP is 1.1.1.1 and destination IP is 2.2.2.2. + +Currently, the following actions are supported: + +- **mark**: + Attaches a user-defined integer value to matching packets. + Can be specified together with another action. + +- **queue**: + Directs matching packets to a specific receive queue. + +- **drop**: + Discards matching packets at the hardware level. + +- **rss**: + Enables Receive Side Scaling (RSS) for matching packets. + +Constraints: + * For RSS, only the global configuration is used; + per-rule queue lists or RSS keys are not supported. + +To direct matching packets to a specific queue, and set mbuf FDIR metadata in: + +.. code-block:: console + + flow create 0 ingress pattern raw \ + pattern spec 00000000000100000000000208004500001c000000000011000001010101020202020000000000080000 \ + pattern mask 0000000000000000000000000000000000000000000000000000ffffffffffffffff0000000000000000 / end \ + actions queue index 3 mark id 3 / end + +Equivalent C code using the flow API: + +.. code-block:: c + + /* Hex string for the packet spec (Ethernet + IPv4 + UDP header) */ + static const uint8_t raw_pattern_spec[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, /* Destination MAC */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, /* Source MAC */ + 0x08, 0x00, /* EtherType: IPv4 */ + 0x45, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x11, 0x00, 0x00, /* IPv4 header, protocol UDP */ + 0x01, 0x01, 0x01, 0x01, /* Source IP: 1.1.1.1 */ + 0x02, 0x02, 0x02, 0x02, /* Destination IP: 2.2.2.2 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00 /* UDP header */ + }; + + /* Mask indicating which fields to match (source and destination IPs) */ + static const uint8_t raw_pattern_mask[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* MAC addresses - ignored */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, /* EtherType - ignored */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, /* IPv4/UDP headers - ignored */ + 0xff, 0xff, 0xff, 0xff, /* Source IP - match all bits */ + 0xff, 0xff, 0xff, 0xff, /* Destination IP - match all bits */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /* UDP - ignored */ + }; + + struct rte_flow_item_raw raw_spec = { + .length = sizeof(raw_pattern_spec), + .pattern = raw_pattern_spec, + }; + + struct rte_flow_item_raw raw_mask = { + .length = sizeof(raw_pattern_mask), + .pattern = raw_pattern_mask, + }; + + struct rte_flow_attr attr = { + .ingress = 1, + }; + + struct rte_flow_item pattern[] = { + { + .type = RTE_FLOW_ITEM_TYPE_RAW, + .spec = &raw_spec, + .mask = &raw_mask, + }, + { + .type = RTE_FLOW_ITEM_TYPE_END, + }, + }; + + struct rte_flow_action actions[] = { + /* direct flow to queue index 3 */ + { + .type = RTE_FLOW_ACTION_TYPE_QUEUE, + .conf = &(struct rte_flow_action_queue){ .index = 3 }, + }, + /* write id into mbuf FDIR metadata */ + { + .type = RTE_FLOW_ACTION_TYPE_MARK, + .conf = &(struct rte_flow_action_mark){ .id = 3 }, + }, + { + .type = RTE_FLOW_ACTION_TYPE_END, + }, + }; + + struct rte_flow_error error; + struct rte_flow *flow = flow = rte_flow_create(port_id, &attr, pattern, actions, &error); + +To use masked bits (IPv4 source/destination addresses) to distribute such packets via RSS: + +.. code-block:: console + + flow create 0 ingress pattern raw \ + pattern spec 00000000000100000000000208004500001c000000000011000001010101020202020000000000080000 \ + pattern mask 0000000000000000000000000000000000000000000000000000ffffffffffffffff0000000000000000 / end \ + actions rss / end + +Equivalent C code using the flow API: + +.. code-block:: c + + /* Use the same structures and code as above, only actions change */ + + struct rte_flow_action actions[] = { + { + .type = RTE_FLOW_ACTION_TYPE_RSS, + /* Use NULL conf for default RSS configuration */ + }, + { + .type = RTE_FLOW_ACTION_TYPE_END, + }, + }; + +**Limitations** Currently, raw pattern support is limited to the FDIR and Hash engines. +.. note:: + + **DDP Package Dependency**: + Raw pattern functionality relies on the loaded DDP package + to define available packet types and protocol parsing rules. + Different DDP packages (OS Default, COMMS, Wireless) + may support different protocol combinations and PTYPE mappings. + Traffic Management Support ~~~~~~~~~~~~~~~~~~~~~~~~~~ From 9ac3d9cc61bc70fdc6dff14969a7111a673be932 Mon Sep 17 00:00:00 2001 From: Zhichao Zeng Date: Thu, 13 Nov 2025 15:47:10 +0800 Subject: [PATCH 15/99] net/ice: fix statistics The statistics contain 40 bits. The lower 32 bits are read first, followed by the upper 8 bits. In some cases, after reading the lower 32 bits, a carry occurs from the lower bits, which causes the final statistics to be incorrect. This commit fixes this issue. Fixes: a37bde56314d ("net/ice: support statistics") Cc: stable@dpdk.org Signed-off-by: Zhichao Zeng Acked-by: Bruce Richardson --- drivers/net/intel/ice/ice_ethdev.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/intel/ice/ice_ethdev.c b/drivers/net/intel/ice/ice_ethdev.c index c1d92435d15..c721d135f5c 100644 --- a/drivers/net/intel/ice/ice_ethdev.c +++ b/drivers/net/intel/ice/ice_ethdev.c @@ -6417,10 +6417,16 @@ ice_stat_update_40(struct ice_hw *hw, uint64_t *stat) { uint64_t new_data; + uint32_t lo_old, hi, lo; - new_data = (uint64_t)ICE_READ_REG(hw, loreg); - new_data |= (uint64_t)(ICE_READ_REG(hw, hireg) & ICE_8_BIT_MASK) << - ICE_32_BIT_WIDTH; + do { + lo_old = ICE_READ_REG(hw, loreg); + hi = ICE_READ_REG(hw, hireg); + lo = ICE_READ_REG(hw, loreg); + } while (lo_old > lo); + + new_data = (uint64_t)lo; + new_data |= (uint64_t)(hi & ICE_8_BIT_MASK) << ICE_32_BIT_WIDTH; if (!offset_loaded) *offset = new_data; From 743bbd3bd22561ace152403fb505b48e4620ac53 Mon Sep 17 00:00:00 2001 From: Sunil Kumar Kori Date: Wed, 12 Nov 2025 14:10:57 +0000 Subject: [PATCH 16/99] net/ice: fix Tx packet prepare As per recent change by the following commit: commit 066f3d9cc21c ("ethdev: remove callback checks from fast path") framework unconditionally invokes dev->tx_pkt_prepare. Due to this, ICE driver gets crashed as tx_pkt_prepare was set to NULL during initialization. Ensure dev->tx_pkt_prepare is not NULL when vector or simple Tx paths are selected, by assigning rte_eth_tx_pkt_prepare_dummy. This aligns with expectations with above mentioned commit. Bugzilla ID: 1795 Fixes: 6eac0b7fde95 ("net/ice: support advance Rx/Tx") Cc: stable@dpdk.org Signed-off-by: Sunil Kumar Kori Tested-by: Hailin Xu --- drivers/net/intel/ice/ice_rxtx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c index 2673e885c36..74db0fbec92 100644 --- a/drivers/net/intel/ice/ice_rxtx.c +++ b/drivers/net/intel/ice/ice_rxtx.c @@ -4129,7 +4129,7 @@ ice_set_tx_function(struct rte_eth_dev *dev) } if (ad->tx_vec_allowed) { - dev->tx_pkt_prepare = NULL; + dev->tx_pkt_prepare = rte_eth_tx_pkt_prepare_dummy; if (ad->tx_simd_width == RTE_VECT_SIMD_512) { #ifdef CC_AVX512_SUPPORT if (tx_check_ret == ICE_VECTOR_OFFLOAD_PATH) { @@ -4175,7 +4175,7 @@ ice_set_tx_function(struct rte_eth_dev *dev) if (ad->tx_simple_allowed) { PMD_INIT_LOG(DEBUG, "Simple tx finally be used."); dev->tx_pkt_burst = ice_xmit_pkts_simple; - dev->tx_pkt_prepare = NULL; + dev->tx_pkt_prepare = rte_eth_tx_pkt_prepare_dummy; } else { PMD_INIT_LOG(DEBUG, "Normal tx finally be used."); dev->tx_pkt_burst = ice_xmit_pkts; From 19d7188f6fc3c147ac9c8a870ca16a22f61d4096 Mon Sep 17 00:00:00 2001 From: Bruce Richardson Date: Wed, 12 Nov 2025 14:10:58 +0000 Subject: [PATCH 17/99] net/ixgbe: fix Tx packet prepare As per recent change by the following commit: commit 066f3d9cc21c ("ethdev: remove callback checks from fast path") framework unconditionally invokes dev->tx_pkt_prepare. Ensure dev->tx_pkt_prepare is not NULL when vector or simple TX paths are selected, by assigning rte_eth_tx_pkt_prepare_dummy. This aligns with expectations with above mentioned commit. Fixes: 7829b8d52be0 ("net/ixgbe: add Tx preparation") Cc: stable@dpdk.org Signed-off-by: Bruce Richardson --- drivers/net/intel/ixgbe/ixgbe_rxtx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c index 897ee2b671a..a7583c178a1 100644 --- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c +++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c @@ -2653,7 +2653,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ci_tx_queue *txq) #endif (txq->tx_rs_thresh >= IXGBE_TX_MAX_BURST)) { PMD_INIT_LOG(DEBUG, "Using simple tx code path"); - dev->tx_pkt_prepare = NULL; + dev->tx_pkt_prepare = rte_eth_tx_pkt_prepare_dummy; if (txq->tx_rs_thresh <= IXGBE_TX_MAX_FREE_BUF_SZ && rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 && (rte_eal_process_type() != RTE_PROC_PRIMARY || From bd96307d152da4baa1b14a6fcfa7700703179cfc Mon Sep 17 00:00:00 2001 From: Bruce Richardson Date: Wed, 12 Nov 2025 14:10:59 +0000 Subject: [PATCH 18/99] net/fm10k: fix Tx packet prepare As per recent change by the following commit: commit 066f3d9cc21c ("ethdev: remove callback checks from fast path") framework unconditionally invokes dev->tx_pkt_prepare. Ensure dev->tx_pkt_prepare is not NULL when vector or simple TX paths are selected, by assigning rte_eth_tx_pkt_prepare_dummy. This aligns with expectations with above mentioned commit. Fixes: 9b134aa39716 ("net/fm10k: add Tx preparation") Cc: stable@dpdk.org Signed-off-by: Bruce Richardson --- drivers/net/intel/fm10k/fm10k_ethdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/intel/fm10k/fm10k_ethdev.c b/drivers/net/intel/fm10k/fm10k_ethdev.c index 82c0f1a1ad3..97f61afec22 100644 --- a/drivers/net/intel/fm10k/fm10k_ethdev.c +++ b/drivers/net/intel/fm10k/fm10k_ethdev.c @@ -2955,7 +2955,7 @@ fm10k_set_tx_function(struct rte_eth_dev *dev) } else { PMD_INIT_LOG(DEBUG, "Use vector Tx func"); dev->tx_pkt_burst = fm10k_xmit_pkts_vec; - dev->tx_pkt_prepare = NULL; + dev->tx_pkt_prepare = rte_eth_tx_pkt_prepare_dummy; } return; } @@ -2979,7 +2979,7 @@ fm10k_set_tx_function(struct rte_eth_dev *dev) fm10k_txq_vec_setup(txq); } dev->tx_pkt_burst = fm10k_xmit_pkts_vec; - dev->tx_pkt_prepare = NULL; + dev->tx_pkt_prepare = rte_eth_tx_pkt_prepare_dummy; } else { dev->tx_pkt_burst = fm10k_xmit_pkts; dev->tx_pkt_prepare = fm10k_prep_pkts; From 31f1e4ea4d60061da3c7a0c18bdea152b6e78f46 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Thu, 13 Nov 2025 10:30:12 +0000 Subject: [PATCH 19/99] net/intel: rename variable in Rx select The common Rx path selection function contains logic that compares two Rx paths: 1. the path selected as most suitable so far aka "current path" 2. a candidate path that has not yet been selected aka "path" This naming could cause confusion, as the candidate path could also be considered the "current path". To rectify this, rename "current path" to "chosen path". Signed-off-by: Ciara Loftus Acked-by: Bruce Richardson --- drivers/net/intel/common/rx.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/intel/common/rx.h b/drivers/net/intel/common/rx.h index 9fa3cdc64de..ea7fb98b478 100644 --- a/drivers/net/intel/common/rx.h +++ b/drivers/net/intel/common/rx.h @@ -261,7 +261,7 @@ ci_rx_path_select(struct ci_rx_path_features req_features, int default_path) { int i, idx = default_path; - const struct ci_rx_path_features *current_features = NULL; + const struct ci_rx_path_features *chosen_path_features = NULL; for (i = 0; i < num_paths; i++) { const struct ci_rx_path_features *path_features = &infos[i].features; @@ -295,29 +295,29 @@ ci_rx_path_select(struct ci_rx_path_features req_features, if (path_features->simd_width > req_features.simd_width) continue; - /* Do not select the path if it is less suitable than the current path. */ - if (current_features != NULL) { - /* Do not select paths with lower SIMD width than the current path. */ - if (path_features->simd_width < current_features->simd_width) + /* Do not select the path if it is less suitable than the chosen path. */ + if (chosen_path_features != NULL) { + /* Do not select paths with lower SIMD width than the chosen path. */ + if (path_features->simd_width < chosen_path_features->simd_width) continue; - /* Do not select paths with more offloads enabled than the current path if + /* Do not select paths with more offloads enabled than the chosen path if * the SIMD widths are the same. */ - if (path_features->simd_width == current_features->simd_width && + if (path_features->simd_width == chosen_path_features->simd_width && rte_popcount32(path_features->rx_offloads) > - rte_popcount32(current_features->rx_offloads)) + rte_popcount32(chosen_path_features->rx_offloads)) continue; /* Do not select paths without bulk alloc support if requested and the - * current path already meets this requirement. + * chosen path already meets this requirement. */ if (!path_features->extra.bulk_alloc && req_features.extra.bulk_alloc && - current_features->extra.bulk_alloc) + chosen_path_features->extra.bulk_alloc) continue; } /* Finally, select the path since it has met all the requirements. */ idx = i; - current_features = &infos[idx].features; + chosen_path_features = &infos[idx].features; } return idx; From f36df6a25569102afa911b74d8613a5e7267f038 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Thu, 13 Nov 2025 14:50:21 +0000 Subject: [PATCH 20/99] net/idpf: fix queue setup with TSO offload The TCP_TSO Tx offload was missing from the conversion function. This was effectively taking the request for that offload out of consideration when selecting the Tx function. As a result a vector path which does not support TSO could be chosen when that offload was requested, when instead the scalar path should be chosen which does support that offload. Fix this by adding the TSO offload to the conversion function. Fixes: c008a5e740bd ("common/idpf: add queue setup/release") Cc: stable@dpdk.org Signed-off-by: Ciara Loftus Acked-by: Praveen Shetty --- drivers/net/intel/idpf/idpf_rxtx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/intel/idpf/idpf_rxtx.c b/drivers/net/intel/idpf/idpf_rxtx.c index 1c725065df0..4796d8b8620 100644 --- a/drivers/net/intel/idpf/idpf_rxtx.c +++ b/drivers/net/intel/idpf/idpf_rxtx.c @@ -42,6 +42,8 @@ idpf_tx_offload_convert(uint64_t offload) ol |= IDPF_TX_OFFLOAD_TCP_CKSUM; if ((offload & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) != 0) ol |= IDPF_TX_OFFLOAD_SCTP_CKSUM; + if ((offload & RTE_ETH_TX_OFFLOAD_TCP_TSO) != 0) + ol |= IDPF_TX_OFFLOAD_TCP_TSO; if ((offload & RTE_ETH_TX_OFFLOAD_MULTI_SEGS) != 0) ol |= IDPF_TX_OFFLOAD_MULTI_SEGS; if ((offload & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) != 0) From d21c2fe6e5a1ef1e7cc9490f54f359db1cfd5283 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 13 Nov 2025 13:33:44 -0800 Subject: [PATCH 21/99] net/iavf: fix check for PF Rx timestamp support The iavf driver has support for hardware Rx timestamps since commit b5cd735132f6 ("net/iavf: enable Rx timestamp on flex descriptor"). To enable this, the VF must first negotiate PTP capabilities with the PF by sending the VIRTCHNL_OP_1588_PTP_GET_CAPS command, with the requested capabilities. The PF will respond with the actually supported subset of capabilities. The PF may not actually enable Rx timestamping, even if it reports the overall PTP capability support. If this happens, the iavf driver logic will incorrectly report that Rx timestamps can be enabled despite being rejected by the PF. This is unlikely in practice, as most PFs which support the VIRTCHNL_VF_CAP_PTP will support Rx timestamping. However, there are some cases where this may not be true. Check that the PF actually reports the Rx timestamping capability instead of assuming it is enabled. Doing so prevents the DPDK application from attempting to enable Rx timestamps when they won't actually be enabled. Fixes: b5cd735132f6 ("net/iavf: enable Rx timestamp on flex descriptor") Cc: stable@dpdk.org Signed-off-by: Jacob Keller --- drivers/net/intel/iavf/iavf_ethdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/intel/iavf/iavf_ethdev.c b/drivers/net/intel/iavf/iavf_ethdev.c index 15e49fe2481..3ef766de470 100644 --- a/drivers/net/intel/iavf/iavf_ethdev.c +++ b/drivers/net/intel/iavf/iavf_ethdev.c @@ -1177,7 +1177,8 @@ iavf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_CRC) dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_KEEP_CRC; - if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_CAP_PTP) + if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_CAP_PTP && + vf->ptp_caps & VIRTCHNL_1588_PTP_CAP_RX_TSTAMP) dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_TIMESTAMP; if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2 && From dba51a2fbdde67a2237a8d2c9fb73baf29e04dd0 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 13 Nov 2025 13:33:45 -0800 Subject: [PATCH 22/99] net/iavf: fix Rx timestamp validity check When reporting an Rx timestamp from the receive descriptor, the iavf driver does not check the validity bit in the time_stamp_low field. In the event that hardware does not capture a receive timestamp for any reason, this valid bit is unset, and the timestamp value in the descriptor is zeroed out. The iavf driver ignores this and passes the zero value into the iavf_tstamp_convert_32b_64b function regardless, and proceeds to treat the result as a valid timestamp. Instead of reporting a zero timestamp which users can clearly interpret as invalid, the raw 0 value from the descriptor is "extended" to the 64-bit timestamp. This results in values which are not immediately obvious as invalid to users: timestamp 1760629088881475583 timestamp 1760629088881475583 timestamp 1760629088881475583 First, if the value is printed in base 10 it is not immediately obvious that the lower 32 bits are zero. Second, multiple packets in sequence will receive the same "timestamp". This occurs because of the timestamp extension logic. The receive descriptor timestamps are 40 bits, with 32 bits of nanosecond precision, 7 bits of subnanosecond precision, and 1 validity bit. The sub-nanosecond precision bits are discarded. To obtain a 64-bit timestamp, the upper 32 bits are calculated from the lower 32-bits and a snapshot of the PHC timer that is captured recently (within ~2 seconds of the packet timestamp). This enables reporting proper full 64-bit timestamps without needing to store all 64 bits in the receive descriptor. However, when timestamps are not working properly, the raw 'zero' value is extended regardless of whether hardware indicated it was a valid timestamp. As a result, users can see what appear at a glance as valid timestamps. However, they will not match the packet reception time, and will only update when the upper bits would roll over. This occurs every 2^32 seconds, or approximately once every 4 seconds. Instead of reporting bogus extended timestamp values which could confuse user applications, check the validity bit and only report a timestamp of the valid bit is set. This matches the implementation used in the Linux PF driver. Fixes: b5cd735132f6 ("net/iavf: enable Rx timestamp on flex descriptor") Cc: stable@dpdk.org Signed-off-by: Jacob Keller Acked-by: Bruce Richardson --- drivers/net/intel/iavf/iavf_rxtx.c | 9 ++++++--- drivers/net/intel/iavf/iavf_rxtx.h | 3 +++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c index ea49059f83a..d8662fd8153 100644 --- a/drivers/net/intel/iavf/iavf_rxtx.c +++ b/drivers/net/intel/iavf/iavf_rxtx.c @@ -1582,7 +1582,8 @@ iavf_recv_pkts_flex_rxd(void *rx_queue, rxd_to_pkt_fields_ops[rxq->rxdid](rxq, rxm, &rxd); pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0); - if (iavf_timestamp_dynflag > 0) { + if (iavf_timestamp_dynflag > 0 && + rxd.wb.time_stamp_low & IAVF_RX_FLX_DESC_TS_VALID) { ts_ns = iavf_tstamp_convert_32b_64b(rxq->phc_time, rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high)); @@ -1751,7 +1752,8 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts, rxd_to_pkt_fields_ops[rxq->rxdid](rxq, first_seg, &rxd); pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0); - if (iavf_timestamp_dynflag > 0) { + if (iavf_timestamp_dynflag > 0 && + rxd.wb.time_stamp_low & IAVF_RX_FLX_DESC_TS_VALID) { ts_ns = iavf_tstamp_convert_32b_64b(rxq->phc_time, rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high)); @@ -2036,7 +2038,8 @@ iavf_rx_scan_hw_ring_flex_rxd(struct ci_rx_queue *rxq, stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0); pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0); - if (iavf_timestamp_dynflag > 0) { + if (iavf_timestamp_dynflag > 0 && + rxdp[j].wb.time_stamp_low & IAVF_RX_FLX_DESC_TS_VALID) { ts_ns = iavf_tstamp_convert_32b_64b(rxq->phc_time, rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high)); diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h index 5c9339b99f9..8efb3bd04ee 100644 --- a/drivers/net/intel/iavf/iavf_rxtx.h +++ b/drivers/net/intel/iavf/iavf_rxtx.h @@ -504,6 +504,9 @@ enum iavf_tx_ctx_desc_tunnel_l4_tunnel_type { /* for iavf_32b_rx_flex_desc.pkt_len member */ #define IAVF_RX_FLX_DESC_PKT_LEN_M (0x3FFF) /* 14-bits */ +/* Valid indicator bit for the time_stamp_low field */ +#define IAVF_RX_FLX_DESC_TS_VALID (0x1UL) + int iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t nb_desc, From ce19d0ad17886f9f3af5bc16b39e15b00e9a94a2 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 14 Nov 2025 10:21:31 -0800 Subject: [PATCH 23/99] net/e1000: check flex filter mask range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gcc-16 complains about possible reference outside of array when managing flex filter. This is a false positive because the filter length can never be that long, but compiler can't detect that. Add guard rail check to only loop over possible array. ../drivers/net/intel/e1000/igb_ethdev.c:4265:23: note: at offset 152 into destination object of size 176 allocated by ‘rte_zmalloc’ 4265 | flex_filter = rte_zmalloc("e1000_flex_filter", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4266 | sizeof(struct e1000_flex_filter), 0); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/net/intel/e1000/igb_ethdev.c:4280:50: warning: writing 64 bytes into a region of size 0 [-Wstringop-overflow=] 4280 | flex_filter->filter_info.mask[i] = mask; | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~ Signed-off-by: Stephen Hemminger Reviewed-by: Anatoly Burakov --- drivers/net/intel/e1000/igb_ethdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/intel/e1000/igb_ethdev.c b/drivers/net/intel/e1000/igb_ethdev.c index f4e2a6442e0..1f51ba64062 100644 --- a/drivers/net/intel/e1000/igb_ethdev.c +++ b/drivers/net/intel/e1000/igb_ethdev.c @@ -4270,7 +4270,8 @@ eth_igb_add_del_flex_filter(struct rte_eth_dev *dev, flex_filter->filter_info.len = filter->len; flex_filter->filter_info.priority = filter->priority; memcpy(flex_filter->filter_info.dwords, filter->bytes, filter->len); - for (i = 0; i < RTE_ALIGN(filter->len, CHAR_BIT) / CHAR_BIT; i++) { + for (i = 0; i < RTE_ALIGN(filter->len, CHAR_BIT) / CHAR_BIT && + i < E1000_FLEX_FILTERS_MASK_SIZE; i++) { mask = 0; /* reverse bits in flex filter's mask*/ for (shift = 0; shift < CHAR_BIT; shift++) { From ee46024b5e067b45af3827d91496df83348f99a0 Mon Sep 17 00:00:00 2001 From: Satha Rao Date: Thu, 13 Nov 2025 10:07:46 +0530 Subject: [PATCH 24/99] common/cnxk: add TM tree for SDP interface Create a new default tree for the SDP interface if more than one Tx queue is requested. This helps to back pressure each queue independently when they are created with separate channels. Signed-off-by: Satha Rao --- drivers/common/cnxk/roc_nix.h | 2 + drivers/common/cnxk/roc_nix_priv.h | 2 + drivers/common/cnxk/roc_nix_tm.c | 158 ++++++++++++++++++ drivers/common/cnxk/roc_nix_tm_ops.c | 5 +- drivers/common/cnxk/roc_nix_tm_utils.c | 2 +- .../common/cnxk/roc_platform_base_symbols.c | 1 + 6 files changed, 168 insertions(+), 2 deletions(-) diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h index a62ddf4732b..f4b92364865 100644 --- a/drivers/common/cnxk/roc_nix.h +++ b/drivers/common/cnxk/roc_nix.h @@ -689,6 +689,7 @@ enum roc_nix_tm_tree { ROC_NIX_TM_DEFAULT = 0, ROC_NIX_TM_RLIMIT, ROC_NIX_TM_PFC, + ROC_NIX_TM_SDP, ROC_NIX_TM_USER, ROC_NIX_TM_TREE_MAX, }; @@ -861,6 +862,7 @@ int __roc_api roc_nix_tm_lvl_cnt_get(struct roc_nix *roc_nix); int __roc_api roc_nix_tm_lvl_have_link_access(struct roc_nix *roc_nix, int lvl); int __roc_api roc_nix_tm_prepare_rate_limited_tree(struct roc_nix *roc_nix); int __roc_api roc_nix_tm_pfc_prepare_tree(struct roc_nix *roc_nix); +int __roc_api roc_nix_tm_sdp_prepare_tree(struct roc_nix *roc_nix); bool __roc_api roc_nix_tm_is_user_hierarchy_enabled(struct roc_nix *nix); int __roc_api roc_nix_tm_tree_type_get(struct roc_nix *nix); int __roc_api roc_nix_tm_mark_config(struct roc_nix *roc_nix, diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h index c9496211968..f1fcee2acbd 100644 --- a/drivers/common/cnxk/roc_nix_priv.h +++ b/drivers/common/cnxk/roc_nix_priv.h @@ -389,6 +389,8 @@ nix_tm_tree2str(enum roc_nix_tm_tree tree) return "Rate Limit Tree"; else if (tree == ROC_NIX_TM_PFC) return "PFC Tree"; + else if (tree == ROC_NIX_TM_SDP) + return "SDP Tree"; else if (tree == ROC_NIX_TM_USER) return "User Tree"; return "???"; diff --git a/drivers/common/cnxk/roc_nix_tm.c b/drivers/common/cnxk/roc_nix_tm.c index abfe80978b4..2771fd8fc4c 100644 --- a/drivers/common/cnxk/roc_nix_tm.c +++ b/drivers/common/cnxk/roc_nix_tm.c @@ -1890,6 +1890,164 @@ roc_nix_tm_pfc_prepare_tree(struct roc_nix *roc_nix) return rc; } +int +roc_nix_tm_sdp_prepare_tree(struct roc_nix *roc_nix) +{ + struct nix *nix = roc_nix_to_nix_priv(roc_nix); + uint32_t nonleaf_id = nix->nb_tx_queues; + uint32_t tl2_node_id, tl3_node_id; + uint8_t leaf_lvl, lvl, lvl_start; + struct nix_tm_node *node = NULL; + uint32_t parent, i; + int rc = -ENOMEM; + + parent = ROC_NIX_TM_NODE_ID_INVALID; + leaf_lvl = (nix_tm_have_tl1_access(nix) ? ROC_TM_LVL_QUEUE : ROC_TM_LVL_SCH4); + + /* TL1 node */ + node = nix_tm_node_alloc(); + if (!node) + goto error; + + node->id = nonleaf_id; + node->parent_id = parent; + node->priority = 0; + node->weight = NIX_TM_DFLT_RR_WT; + node->shaper_profile_id = ROC_NIX_TM_SHAPER_PROFILE_NONE; + node->lvl = ROC_TM_LVL_ROOT; + node->tree = ROC_NIX_TM_SDP; + node->rel_chan = NIX_TM_CHAN_INVALID; + + rc = nix_tm_node_add(roc_nix, node); + if (rc) + goto error; + + parent = nonleaf_id; + nonleaf_id++; + + lvl_start = ROC_TM_LVL_SCH1; + if (roc_nix_is_pf(roc_nix)) { + /* TL2 node */ + rc = -ENOMEM; + node = nix_tm_node_alloc(); + if (!node) + goto error; + + node->id = nonleaf_id; + node->parent_id = parent; + node->priority = 0; + node->weight = NIX_TM_DFLT_RR_WT; + node->shaper_profile_id = ROC_NIX_TM_SHAPER_PROFILE_NONE; + node->lvl = ROC_TM_LVL_SCH1; + node->tree = ROC_NIX_TM_SDP; + node->rel_chan = NIX_TM_CHAN_INVALID; + + rc = nix_tm_node_add(roc_nix, node); + if (rc) + goto error; + + lvl_start = ROC_TM_LVL_SCH2; + tl2_node_id = nonleaf_id; + nonleaf_id++; + } else { + tl2_node_id = parent; + } + + /* Allocate TL3 node */ + rc = -ENOMEM; + node = nix_tm_node_alloc(); + if (!node) + goto error; + + node->id = nonleaf_id; + node->parent_id = tl2_node_id; + node->priority = 0; + node->weight = NIX_TM_DFLT_RR_WT; + node->shaper_profile_id = ROC_NIX_TM_SHAPER_PROFILE_NONE; + node->lvl = lvl_start; + node->tree = ROC_NIX_TM_SDP; + node->rel_chan = NIX_TM_CHAN_INVALID; + + rc = nix_tm_node_add(roc_nix, node); + if (rc) + goto error; + + tl3_node_id = nonleaf_id; + nonleaf_id++; + lvl_start++; + + for (i = 0; i < nix->nb_tx_queues; i++) { + parent = tl3_node_id; + rc = -ENOMEM; + node = nix_tm_node_alloc(); + if (!node) + goto error; + + node->id = nonleaf_id; + node->parent_id = parent; + node->priority = 0; + node->weight = NIX_TM_DFLT_RR_WT; + node->shaper_profile_id = ROC_NIX_TM_SHAPER_PROFILE_NONE; + node->lvl = lvl_start; + node->tree = ROC_NIX_TM_SDP; + /* For SDP, if BP enabled use channel to PAUSE the corresponding queue */ + node->rel_chan = (i % nix->tx_chan_cnt); + + rc = nix_tm_node_add(roc_nix, node); + if (rc) + goto error; + + parent = nonleaf_id; + nonleaf_id++; + + lvl = (nix_tm_have_tl1_access(nix) ? ROC_TM_LVL_SCH4 : ROC_TM_LVL_SCH3); + + rc = -ENOMEM; + node = nix_tm_node_alloc(); + if (!node) + goto error; + + node->id = nonleaf_id; + node->parent_id = parent; + node->priority = 0; + node->weight = NIX_TM_DFLT_RR_WT; + node->shaper_profile_id = ROC_NIX_TM_SHAPER_PROFILE_NONE; + node->lvl = lvl; + node->tree = ROC_NIX_TM_SDP; + node->rel_chan = NIX_TM_CHAN_INVALID; + + rc = nix_tm_node_add(roc_nix, node); + if (rc) + goto error; + + parent = nonleaf_id; + nonleaf_id++; + + rc = -ENOMEM; + node = nix_tm_node_alloc(); + if (!node) + goto error; + + node->id = i; + node->parent_id = parent; + node->priority = 0; + node->weight = NIX_TM_DFLT_RR_WT; + node->shaper_profile_id = ROC_NIX_TM_SHAPER_PROFILE_NONE; + node->lvl = leaf_lvl; + node->tree = ROC_NIX_TM_SDP; + node->rel_chan = NIX_TM_CHAN_INVALID; + + rc = nix_tm_node_add(roc_nix, node); + if (rc) + goto error; + } + + return 0; +error: + nix_tm_node_free(node); + return rc; +} + int nix_tm_free_resources(struct roc_nix *roc_nix, uint32_t tree_mask, bool hw_only) { diff --git a/drivers/common/cnxk/roc_nix_tm_ops.c b/drivers/common/cnxk/roc_nix_tm_ops.c index b89f08ac660..951c310a560 100644 --- a/drivers/common/cnxk/roc_nix_tm_ops.c +++ b/drivers/common/cnxk/roc_nix_tm_ops.c @@ -1035,7 +1035,10 @@ roc_nix_tm_init(struct roc_nix *roc_nix) } /* Prepare default tree */ - rc = nix_tm_prepare_default_tree(roc_nix); + if (roc_nix_is_sdp(roc_nix) && (nix->nb_tx_queues > 1)) + rc = roc_nix_tm_sdp_prepare_tree(roc_nix); + else + rc = nix_tm_prepare_default_tree(roc_nix); if (rc) { plt_err("failed to prepare default tm tree, rc=%d", rc); return rc; diff --git a/drivers/common/cnxk/roc_nix_tm_utils.c b/drivers/common/cnxk/roc_nix_tm_utils.c index 4a09cc2aae5..eaf6f9e4c78 100644 --- a/drivers/common/cnxk/roc_nix_tm_utils.c +++ b/drivers/common/cnxk/roc_nix_tm_utils.c @@ -582,7 +582,7 @@ nix_tm_topology_reg_prep(struct nix *nix, struct nix_tm_node *node, /* Configure TL4 to send to SDP channel instead of CGX/LBK */ if (nix->sdp_link) { - relchan = nix->tx_chan_base & 0xff; + relchan = (nix->tx_chan_base & 0xff) + node->rel_chan; plt_tm_dbg("relchan=%u schq=%u tx_chan_cnt=%u", relchan, schq, nix->tx_chan_cnt); reg[k] = NIX_AF_TL4X_SDP_LINK_CFG(schq); diff --git a/drivers/common/cnxk/roc_platform_base_symbols.c b/drivers/common/cnxk/roc_platform_base_symbols.c index ff64e829149..40d5cd290bb 100644 --- a/drivers/common/cnxk/roc_platform_base_symbols.c +++ b/drivers/common/cnxk/roc_platform_base_symbols.c @@ -223,6 +223,7 @@ RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_rq_dump) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_cq_dump) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_sq_dump) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_tm_dump) +RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_tm_sdp_prepare_tree) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_dump) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_inl_dev_dump) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_inl_outb_cpt_lfs_dump) From 33800b56d65fb5db8aadcb4a38982bb1e9ecb8ec Mon Sep 17 00:00:00 2001 From: Satha Rao Date: Thu, 13 Nov 2025 10:07:47 +0530 Subject: [PATCH 25/99] net/cnxk: add TM tree for SDP interface Create a new default tree for the SDP interface if more than one Tx queue is requested. This helps to back pressure each queue independently when they are created with separate channels. Signed-off-by: Satha Rao --- drivers/net/cnxk/cnxk_ethdev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/cnxk/cnxk_ethdev.c b/drivers/net/cnxk/cnxk_ethdev.c index 7c1aa34de23..69830a12808 100644 --- a/drivers/net/cnxk/cnxk_ethdev.c +++ b/drivers/net/cnxk/cnxk_ethdev.c @@ -1539,7 +1539,10 @@ cnxk_nix_configure(struct rte_eth_dev *eth_dev) goto free_nix_lf; } - rc = roc_nix_tm_hierarchy_enable(nix, ROC_NIX_TM_DEFAULT, false); + if (roc_nix_is_sdp(&dev->nix) && nb_txq > 1) + rc = roc_nix_tm_hierarchy_enable(nix, ROC_NIX_TM_SDP, false); + else + rc = roc_nix_tm_hierarchy_enable(nix, ROC_NIX_TM_DEFAULT, false); if (rc) { plt_err("Failed to enable default tm hierarchy, rc=%d", rc); goto tm_fini; From b5b5d166d6eed7bd6af1a2065d0e378a7769689f Mon Sep 17 00:00:00 2001 From: Satha Rao Date: Thu, 13 Nov 2025 10:07:48 +0530 Subject: [PATCH 26/99] net/cnxk: disable CQ when SQ stopped Drain all CQ buffers and close CQ when SQ enabled completion is about to stop. Signed-off-by: Satha Rao --- drivers/net/cnxk/cn10k_ethdev.c | 16 ++++++++-------- drivers/net/cnxk/cn20k_ethdev.c | 11 ++++++----- drivers/net/cnxk/cnxk_ethdev.c | 11 ++++++++++- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c index 9c1621dbfa5..23a2341c8b4 100644 --- a/drivers/net/cnxk/cn10k_ethdev.c +++ b/drivers/net/cnxk/cn10k_ethdev.c @@ -198,21 +198,21 @@ cn10k_nix_tx_compl_setup(struct cnxk_eth_dev *dev, static void cn10k_nix_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t qid) { + struct cn10k_eth_txq *txq = eth_dev->data->tx_queues[qid]; struct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev); struct roc_nix *nix = &dev->nix; - struct cn10k_eth_txq *txq; - cnxk_nix_tx_queue_release(eth_dev, qid); - txq = eth_dev->data->tx_queues[qid]; - - if (nix->tx_compl_ena) + if (nix->tx_compl_ena) { + /* First process all CQ entries */ + handle_tx_completion_pkts(txq, 0); plt_free(txq->tx_compl.ptr); + } + cnxk_nix_tx_queue_release(eth_dev, qid); } static int -cn10k_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid, - uint16_t nb_desc, unsigned int socket, - const struct rte_eth_txconf *tx_conf) +cn10k_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid, uint16_t nb_desc, + unsigned int socket, const struct rte_eth_txconf *tx_conf) { struct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev); struct roc_nix *nix = &dev->nix; diff --git a/drivers/net/cnxk/cn20k_ethdev.c b/drivers/net/cnxk/cn20k_ethdev.c index 376e334588f..a7ef1dd3868 100644 --- a/drivers/net/cnxk/cn20k_ethdev.c +++ b/drivers/net/cnxk/cn20k_ethdev.c @@ -192,15 +192,16 @@ cn20k_nix_tx_compl_setup(struct cnxk_eth_dev *dev, struct cn20k_eth_txq *txq, st static void cn20k_nix_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t qid) { + struct cn20k_eth_txq *txq = eth_dev->data->tx_queues[qid]; struct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev); struct roc_nix *nix = &dev->nix; - struct cn20k_eth_txq *txq; - - cnxk_nix_tx_queue_release(eth_dev, qid); - txq = eth_dev->data->tx_queues[qid]; - if (nix->tx_compl_ena) + if (nix->tx_compl_ena) { + /* First process all CQ entries */ + handle_tx_completion_pkts(txq, 0); plt_free(txq->tx_compl.ptr); + } + cnxk_nix_tx_queue_release(eth_dev, qid); } static int diff --git a/drivers/net/cnxk/cnxk_ethdev.c b/drivers/net/cnxk/cnxk_ethdev.c index 69830a12808..ff78622b584 100644 --- a/drivers/net/cnxk/cnxk_ethdev.c +++ b/drivers/net/cnxk/cnxk_ethdev.c @@ -658,6 +658,7 @@ cnxk_nix_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t qid) struct cnxk_eth_txq_sp *txq_sp; struct cnxk_eth_dev *dev; struct roc_nix_sq *sq; + struct roc_nix_cq *cq; int rc; if (!txq) @@ -666,11 +667,19 @@ cnxk_nix_tx_queue_release(struct rte_eth_dev *eth_dev, uint16_t qid) txq_sp = cnxk_eth_txq_to_sp(txq); dev = txq_sp->dev; + sq = &dev->sqs[qid]; plt_nix_dbg("Releasing txq %u", qid); + if (dev->nix.tx_compl_ena) { + /* Cleanup ROC CQ */ + cq = &dev->cqs[sq->cqid]; + rc = roc_nix_cq_fini(cq); + if (rc) + plt_err("Failed to cleanup cq, rc=%d", rc); + } + /* Cleanup ROC SQ */ - sq = &dev->sqs[qid]; rc = roc_nix_sq_fini(sq); if (rc) plt_err("Failed to cleanup sq, rc=%d", rc); From d9a6291f5a6442e6cfb215c7258ede755d4ebbaa Mon Sep 17 00:00:00 2001 From: Nithin Dabilpuram Date: Thu, 13 Nov 2025 10:07:49 +0530 Subject: [PATCH 27/99] net/cnxk: update scatter check as warning for SDP Update scatter check as warning for SDP interfaces instead of error to support cases where host application is already aware for the max buf size. Signed-off-by: Nithin Dabilpuram --- drivers/net/cnxk/cnxk_ethdev_ops.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/cnxk/cnxk_ethdev_ops.c b/drivers/net/cnxk/cnxk_ethdev_ops.c index 7a8ddb4c9b9..49e77e49a67 100644 --- a/drivers/net/cnxk/cnxk_ethdev_ops.c +++ b/drivers/net/cnxk/cnxk_ethdev_ops.c @@ -613,8 +613,11 @@ cnxk_nix_mtu_set(struct rte_eth_dev *eth_dev, uint16_t mtu) */ if (data->dev_started && frame_size > buffsz && !(dev->rx_offloads & RTE_ETH_RX_OFFLOAD_SCATTER)) { - plt_err("Scatter offload is not enabled for mtu"); - goto exit; + if (!roc_nix_is_sdp(nix)) { + plt_err("Scatter offload is not enabled for mtu"); + goto exit; + } + plt_warn("Scatter offload is not enabled for mtu on SDP interface"); } /* Check * >= max_frame */ From 5a753913e06ab335d147ddb631d7bd3e15534d62 Mon Sep 17 00:00:00 2001 From: Monendra Singh Kushwaha Date: Thu, 13 Nov 2025 10:07:50 +0530 Subject: [PATCH 28/99] common/cnxk: fix inline device write operation This patch fixes the inline device functions to work when roc_nix is NULL. Fixes: f81ee7133b48 ("common/cnxk: support inline SA context invalidate") Cc: stable@dpdk.org Signed-off-by: Monendra Singh Kushwaha --- drivers/common/cnxk/roc_nix_inl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/common/cnxk/roc_nix_inl.c b/drivers/common/cnxk/roc_nix_inl.c index 70ab8001e18..6700f556a0e 100644 --- a/drivers/common/cnxk/roc_nix_inl.c +++ b/drivers/common/cnxk/roc_nix_inl.c @@ -2324,7 +2324,7 @@ roc_nix_inl_ctx_write(struct roc_nix *roc_nix, void *sa_dptr, void *sa_cptr, if (outb_lf == NULL) goto exit; - if (roc_model_is_cn10k() || roc_nix->use_write_sa) { + if (roc_model_is_cn10k() || (roc_nix && roc_nix->use_write_sa)) { rbase = outb_lf->rbase; flush.u = 0; From c054608c92e37c0b5c6cc90a6c3878a72738c706 Mon Sep 17 00:00:00 2001 From: Satha Rao Date: Thu, 13 Nov 2025 10:07:51 +0530 Subject: [PATCH 29/99] common/cnxk: add mailbox to configure LSO alt flags LSO enhanced to support flags modification. Added new mbox to enable this feature. Signed-off-by: Satha Rao --- drivers/common/cnxk/hw/nix.h | 46 ++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/drivers/common/cnxk/hw/nix.h b/drivers/common/cnxk/hw/nix.h index c438f18145f..3aed2c7d54c 100644 --- a/drivers/common/cnxk/hw/nix.h +++ b/drivers/common/cnxk/hw/nix.h @@ -2508,18 +2508,44 @@ struct nix_lso_format { uint64_t sizem1 : 2; uint64_t rsvd_14_15 : 2; uint64_t alg : 3; - uint64_t rsvd_19_63 : 45; + uint64_t alt_flags : 1; + uint64_t alt_flags_index : 2; + uint64_t shift : 3; + uint64_t rsvd_25_63 : 39; }; -#define NIX_LSO_FIELD_MAX (8) -#define NIX_LSO_FIELD_ALG_MASK GENMASK(18, 16) -#define NIX_LSO_FIELD_SZ_MASK GENMASK(13, 12) -#define NIX_LSO_FIELD_LY_MASK GENMASK(9, 8) -#define NIX_LSO_FIELD_OFF_MASK GENMASK(7, 0) - -#define NIX_LSO_FIELD_MASK \ - (NIX_LSO_FIELD_OFF_MASK | NIX_LSO_FIELD_LY_MASK | \ - NIX_LSO_FIELD_SZ_MASK | NIX_LSO_FIELD_ALG_MASK) +/* NIX LSO ALT_FLAGS field structure */ +typedef union nix_lso_alt_flg_format { + uint64_t u[2]; + + struct nix_lso_alt_flg_cfg { + /* NIX_AF_LSO_ALT_FLAGS_CFG */ + uint64_t alt_msf_set : 16; + uint64_t alt_msf_mask : 16; + uint64_t alt_fsf_set : 16; + uint64_t alt_fsf_mask : 16; + + /* NIX_AF_LSO_ALT_FLAGS_CFG1 */ + uint64_t alt_lsf_set : 16; + uint64_t alt_lsf_mask : 16; + uint64_t alt_ssf_set : 16; + uint64_t alt_ssf_mask : 16; + } s; +} nix_lso_alt_flg_format_t; + +#define NIX_LSO_FIELD_MAX (8) +#define NIX_LSO_FIELD_SHIFT_MASK GENMASK(24, 22) +#define NIX_LSO_FIELD_ALT_FLG_IDX_MASK GENMASK(21, 20) +#define NIX_LSO_FIELD_ALT_FLG_MASK BIT_ULL(19) +#define NIX_LSO_FIELD_ALG_MASK GENMASK(18, 16) +#define NIX_LSO_FIELD_SZ_MASK GENMASK(13, 12) +#define NIX_LSO_FIELD_LY_MASK GENMASK(9, 8) +#define NIX_LSO_FIELD_OFF_MASK GENMASK(7, 0) + +#define NIX_LSO_FIELD_MASK \ + (NIX_LSO_FIELD_OFF_MASK | NIX_LSO_FIELD_LY_MASK | NIX_LSO_FIELD_SZ_MASK | \ + NIX_LSO_FIELD_ALG_MASK | NIX_LSO_FIELD_ALT_FLG_MASK | NIX_LSO_FIELD_ALT_FLG_IDX_MASK | \ + NIX_LSO_FIELD_SHIFT_MASK) #define NIX_CN9K_MAX_HW_FRS 9212UL #define NIX_LBK_MAX_HW_FRS 65535UL From 52123cc744c942b282e47320312a4035e31e2f4a Mon Sep 17 00:00:00 2001 From: Satha Rao Date: Thu, 13 Nov 2025 10:07:52 +0530 Subject: [PATCH 30/99] common/cnxk: add IPv4 fragmentation offload Extend LSO offload to support IPv4 fragmentation. Signed-off-by: Satha Rao --- drivers/common/cnxk/hw/nix.h | 2 +- drivers/common/cnxk/roc_nix.h | 8 ++ drivers/common/cnxk/roc_nix_ops.c | 113 +++++++++++++++++- drivers/common/cnxk/roc_nix_priv.h | 1 + .../common/cnxk/roc_platform_base_symbols.c | 2 + 5 files changed, 124 insertions(+), 2 deletions(-) diff --git a/drivers/common/cnxk/hw/nix.h b/drivers/common/cnxk/hw/nix.h index 3aed2c7d54c..3b1ea026115 100644 --- a/drivers/common/cnxk/hw/nix.h +++ b/drivers/common/cnxk/hw/nix.h @@ -2692,7 +2692,7 @@ typedef union nix_lso_alt_flg_format { #define NIX_LSO_SEG_MAX 256 #define NIX_LSO_MPS_MAX (BIT_ULL(14) - 1) -/* Software defined LSO base format IDX */ +/* Kernel defined LSO base format IDX */ #define NIX_LSO_FORMAT_IDX_TSOV4 0 #define NIX_LSO_FORMAT_IDX_TSOV6 1 diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h index f4b92364865..4f8118d4e4e 100644 --- a/drivers/common/cnxk/roc_nix.h +++ b/drivers/common/cnxk/roc_nix.h @@ -29,6 +29,11 @@ #define ROC_NIX_INTF_TYPE_CPT_NIX 254 #define ROC_NIX_INTF_TYPE_SSO 253 +/* Software defined LSO base format IDX */ +#define ROC_NIX_LSO_FORMAT_IDX_TSOV4 0 +#define ROC_NIX_LSO_FORMAT_IDX_TSOV6 1 +#define ROC_NIX_LSO_FORMAT_IDX_IPV4 2 + enum roc_nix_rss_reta_sz { ROC_NIX_RSS_RETA_SZ_64 = 64, ROC_NIX_RSS_RETA_SZ_128 = 128, @@ -972,9 +977,12 @@ int __roc_api roc_nix_lso_fmt_setup(struct roc_nix *roc_nix); int __roc_api roc_nix_lso_fmt_get(struct roc_nix *roc_nix, uint8_t udp_tun[ROC_NIX_LSO_TUN_MAX], uint8_t tun[ROC_NIX_LSO_TUN_MAX]); +int __roc_api roc_nix_lso_fmt_ipv4_frag_get(struct roc_nix *roc_nix); int __roc_api roc_nix_lso_custom_fmt_setup(struct roc_nix *roc_nix, struct nix_lso_format *fields, uint16_t nb_fields); +int __roc_api roc_nix_lso_alt_flags_profile_setup(struct roc_nix *roc_nix, + nix_lso_alt_flg_format_t *fmt); int __roc_api roc_nix_eeprom_info_get(struct roc_nix *roc_nix, struct roc_nix_eeprom_info *info); diff --git a/drivers/common/cnxk/roc_nix_ops.c b/drivers/common/cnxk/roc_nix_ops.c index efb0a41d072..138090317a0 100644 --- a/drivers/common/cnxk/roc_nix_ops.c +++ b/drivers/common/cnxk/roc_nix_ops.c @@ -5,6 +5,8 @@ #include "roc_api.h" #include "roc_priv.h" +#define NIX_LSO_FRMT_IPV4_OFFSET_SHFT 3 + static void nix_lso_tcp(struct nix_lso_format_cfg *req, bool v4) { @@ -159,6 +161,34 @@ nix_lso_tun_tcp(struct nix_lso_format_cfg *req, bool outer_v4, bool inner_v4) field++; } +int +roc_nix_lso_alt_flags_profile_setup(struct roc_nix *roc_nix, nix_lso_alt_flg_format_t *fmt) +{ + struct nix *nix = roc_nix_to_nix_priv(roc_nix); + struct dev *dev = &nix->dev; + struct mbox *mbox = mbox_get(dev->mbox); + struct nix_lso_alt_flags_cfg_rsp *rsp; + struct nix_lso_alt_flags_cfg_req *req; + int rc = -ENOSPC; + + req = mbox_alloc_msg_nix_lso_alt_flags_cfg(mbox); + if (req == NULL) + goto exit; + + req->cfg = fmt->u[0]; + req->cfg1 = fmt->u[1]; + + rc = mbox_process_msg(mbox, (void *)&rsp); + if (rc) + goto exit; + + plt_nix_dbg("Setup alt flags format %u", rsp->lso_alt_flags_idx); + rc = rsp->lso_alt_flags_idx; +exit: + mbox_put(mbox); + return rc; +} + int roc_nix_lso_custom_fmt_setup(struct roc_nix *roc_nix, struct nix_lso_format *fields, uint16_t nb_fields) @@ -194,6 +224,74 @@ roc_nix_lso_custom_fmt_setup(struct roc_nix *roc_nix, return rc; } +static int +nix_lso_ipv4(struct roc_nix *roc_nix) +{ + struct nix *nix = roc_nix_to_nix_priv(roc_nix); + struct nix_lso_format_cfg_rsp *rsp; + nix_lso_alt_flg_format_t alt_flags; + + __io struct nix_lso_format *field; + struct nix_lso_format_cfg *req; + int flag_idx = 0, rc = -ENOSPC; + struct dev *dev = &nix->dev; + struct mbox *mbox; + + /* First get flags profile to update v4 flags */ + memset(&alt_flags, 0, sizeof(alt_flags)); + alt_flags.s.alt_fsf_set = 0x2000; + alt_flags.s.alt_fsf_mask = 0x1FFF; + alt_flags.s.alt_msf_set = 0x2000; + alt_flags.s.alt_msf_mask = 0x1FFF; + alt_flags.s.alt_lsf_set = 0x0000; + alt_flags.s.alt_lsf_mask = 0x1FFF; + flag_idx = roc_nix_lso_alt_flags_profile_setup(roc_nix, &alt_flags); + if (flag_idx < 0) + return rc; + + mbox = mbox_get(dev->mbox); + + /* + * IPv4 Fragmentation + */ + req = mbox_alloc_msg_nix_lso_format_cfg(mbox); + if (req == NULL) { + rc = -ENOSPC; + goto exit; + } + + /* Format works only with TCP packet marked by OL3/OL4 */ + field = (__io struct nix_lso_format *)&req->fields[0]; + req->field_mask = NIX_LSO_FIELD_MASK; + /* Update Payload Length */ + field->layer = NIX_TXLAYER_OL3; + field->offset = 2; + field->sizem1 = 1; /* 2B */ + field->alg = NIX_LSOALG_ADD_PAYLEN; + field++; + + /* Update fragment offset and flags */ + field->layer = NIX_TXLAYER_OL3; + field->offset = 6; + field->sizem1 = 1; + field->shift = NIX_LSO_FRMT_IPV4_OFFSET_SHFT; + field->alt_flags_index = flag_idx; + field->alt_flags = 1; + /* Cumulative length of previous segments */ + field->alg = NIX_LSOALG_ADD_OFFSET; + field++; + rc = mbox_process_msg(mbox, (void *)&rsp); + if (rc) + goto exit; + + /* IPv4 fragment offset shifted by 3 bits, store this value in profile ID */ + nix->lso_ipv4_idx = (NIX_LSO_FRMT_IPV4_OFFSET_SHFT << 8) | (rsp->lso_format_idx & 0x1F); + plt_nix_dbg("ipv4 fmt=%u", rsp->lso_format_idx); +exit: + mbox_put(mbox); + return rc; +} + int roc_nix_lso_fmt_setup(struct roc_nix *roc_nix) { @@ -370,12 +468,25 @@ roc_nix_lso_fmt_setup(struct roc_nix *roc_nix) nix->lso_tun_idx[ROC_NIX_LSO_TUN_V6V6] = rsp->lso_format_idx; plt_nix_dbg("tun v6v6 fmt=%u", rsp->lso_format_idx); - rc = 0; + exit: mbox_put(mbox); + + nix->lso_ipv4_idx = 0; /* IPv4 fragmentation not supported */ + if (!rc && roc_model_is_cn20k()) + return nix_lso_ipv4(roc_nix); + return rc; } +int +roc_nix_lso_fmt_ipv4_frag_get(struct roc_nix *roc_nix) +{ + struct nix *nix = roc_nix_to_nix_priv(roc_nix); + + return nix->lso_ipv4_idx; +} + int roc_nix_lso_fmt_get(struct roc_nix *roc_nix, uint8_t udp_tun[ROC_NIX_LSO_TUN_MAX], diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h index f1fcee2acbd..ae1a8fd7e09 100644 --- a/drivers/common/cnxk/roc_nix_priv.h +++ b/drivers/common/cnxk/roc_nix_priv.h @@ -154,6 +154,7 @@ struct nix { uint8_t lso_tsov4_idx; uint8_t lso_udp_tun_idx[ROC_NIX_LSO_TUN_MAX]; uint8_t lso_tun_idx[ROC_NIX_LSO_TUN_MAX]; + uint16_t lso_ipv4_idx; uint8_t lf_rx_stats; uint8_t lf_tx_stats; uint8_t rx_chan_cnt; diff --git a/drivers/common/cnxk/roc_platform_base_symbols.c b/drivers/common/cnxk/roc_platform_base_symbols.c index 40d5cd290bb..cbb3fec5623 100644 --- a/drivers/common/cnxk/roc_platform_base_symbols.c +++ b/drivers/common/cnxk/roc_platform_base_symbols.c @@ -327,9 +327,11 @@ RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_npc_mac_addr_set) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_npc_mac_addr_get) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_npc_rx_ena_dis) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_npc_mcast_config) +RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_lso_alt_flags_profile_setup) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_lso_custom_fmt_setup) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_lso_fmt_setup) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_lso_fmt_get) +RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_lso_fmt_ipv4_frag_get) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_switch_hdr_set) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_eeprom_info_get) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_rx_drop_re_set) From 6565191cfc370a54e824b6d1018b2c222ce67e1e Mon Sep 17 00:00:00 2001 From: Satha Rao Date: Thu, 13 Nov 2025 10:07:53 +0530 Subject: [PATCH 31/99] common/cnxk: update DF flag in IPv4 fragments While performing IPv4 fragmentation, consider the DF flag from the original packet header instead of setting it to zero. Signed-off-by: Satha Rao --- drivers/common/cnxk/roc_nix_ops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/common/cnxk/roc_nix_ops.c b/drivers/common/cnxk/roc_nix_ops.c index 138090317a0..12a12c6e357 100644 --- a/drivers/common/cnxk/roc_nix_ops.c +++ b/drivers/common/cnxk/roc_nix_ops.c @@ -240,11 +240,11 @@ nix_lso_ipv4(struct roc_nix *roc_nix) /* First get flags profile to update v4 flags */ memset(&alt_flags, 0, sizeof(alt_flags)); alt_flags.s.alt_fsf_set = 0x2000; - alt_flags.s.alt_fsf_mask = 0x1FFF; + alt_flags.s.alt_fsf_mask = 0x5FFF; alt_flags.s.alt_msf_set = 0x2000; - alt_flags.s.alt_msf_mask = 0x1FFF; + alt_flags.s.alt_msf_mask = 0x5FFF; alt_flags.s.alt_lsf_set = 0x0000; - alt_flags.s.alt_lsf_mask = 0x1FFF; + alt_flags.s.alt_lsf_mask = 0x5FFF; flag_idx = roc_nix_lso_alt_flags_profile_setup(roc_nix, &alt_flags); if (flag_idx < 0) return rc; From 141e9a8a42bc5c802a0b3d252e51f784602c4883 Mon Sep 17 00:00:00 2001 From: Satha Rao Date: Thu, 13 Nov 2025 10:07:54 +0530 Subject: [PATCH 32/99] common/cnxk: support per-packet SQ count update SQ context extended with new feature, if enabled the counter is updated when a packet if processed, whether it is transmitted or dropped. Signed-off-by: Satha Rao --- drivers/common/cnxk/hw/nix.h | 47 +++++++------ drivers/common/cnxk/roc_features.h | 7 ++ drivers/common/cnxk/roc_nix.h | 3 + drivers/common/cnxk/roc_nix_queue.c | 70 ++++++++++++++++++- drivers/common/cnxk/roc_nix_tm.c | 2 +- drivers/common/cnxk/roc_nix_tm_ops.c | 8 ++- drivers/common/cnxk/roc_platform.h | 6 ++ .../common/cnxk/roc_platform_base_symbols.c | 1 + 8 files changed, 119 insertions(+), 25 deletions(-) diff --git a/drivers/common/cnxk/hw/nix.h b/drivers/common/cnxk/hw/nix.h index 3b1ea026115..a2a372a56af 100644 --- a/drivers/common/cnxk/hw/nix.h +++ b/drivers/common/cnxk/hw/nix.h @@ -2092,21 +2092,25 @@ struct nix_cn20k_sq_ctx_hw_s { uint64_t default_chan : 12; uint64_t sdp_mcast : 1; uint64_t sso_ena : 1; - uint64_t dse_rsvd1 : 28; + uint64_t dse_rsvd1 : 10; + uint64_t update_sq_count : 2; + uint64_t seb_count : 16; uint64_t sqb_enqueue_count : 16; /* W4 */ uint64_t tail_offset : 6; uint64_t lmt_dis : 1; uint64_t smq_rr_weight : 14; - uint64_t dnq_rsvd1 : 27; + uint64_t dnq_rsvd1 : 4; + uint64_t sq_count_iova_lo : 23; uint64_t tail_sqb : 64; /* W5 */ uint64_t next_sqb : 64; /* W6 */ - uint64_t smq : 11; /* W7 */ + uint64_t smq : 11; /* W7 */ uint64_t smq_pend : 1; uint64_t smq_next_sq : 20; uint64_t smq_next_sq_vld : 1; uint64_t mnq_dis : 1; - uint64_t scm1_rsvd2 : 30; - uint64_t smenq_sqb : 64; /* W8 */ + uint64_t scm1_rsvd2 : 7; + uint64_t sq_count_iova_hi : 23; + uint64_t smenq_sqb : 64; /* W8 */ uint64_t smenq_offset : 6; /* W9 */ uint64_t cq_limit : 8; uint64_t smq_rr_count : 32; @@ -2122,7 +2126,7 @@ struct nix_cn20k_sq_ctx_hw_s { uint64_t smenq_next_sqb_vld : 1; uint64_t scm_dq_rsvd1 : 9; uint64_t smenq_next_sqb : 64; /* W11 */ - uint64_t age_drop_octs : 32; /* W12 */ + uint64_t age_drop_octs : 32; /* W12 */ uint64_t age_drop_pkts : 32; uint64_t drop_pkts : 48; /* W13 */ uint64_t drop_octs_lsw : 16; @@ -2160,19 +2164,20 @@ struct nix_cn20k_sq_ctx_s { uint64_t lmt_dis : 1; uint64_t mnq_dis : 1; uint64_t smq_next_sq : 20; - uint64_t smq_lso_segnum : 8; - uint64_t tail_offset : 6; - uint64_t smenq_offset : 6; - uint64_t head_offset : 6; - uint64_t smenq_next_sqb_vld : 1; - uint64_t smq_pend : 1; - uint64_t smq_next_sq_vld : 1; - uint64_t reserved_253_255 : 3; - uint64_t next_sqb : 64; /* W4 */ - uint64_t tail_sqb : 64; /* W5 */ - uint64_t smenq_sqb : 64; /* W6 */ - uint64_t smenq_next_sqb : 64; /* W7 */ - uint64_t head_sqb : 64; /* W8 */ + uint64_t smq_lso_segnum : 8; + uint64_t tail_offset : 6; + uint64_t smenq_offset : 6; + uint64_t head_offset : 6; + uint64_t smenq_next_sqb_vld : 1; + uint64_t smq_pend : 1; + uint64_t smq_next_sq_vld : 1; + uint64_t update_sq_count : 2; + uint64_t reserved_255_255 : 1; + uint64_t next_sqb : 64; /* W4 */ + uint64_t tail_sqb : 64; /* W5 */ + uint64_t smenq_sqb : 64; /* W6 */ + uint64_t smenq_next_sqb : 64; /* W7 */ + uint64_t head_sqb : 64; /* W8 */ uint64_t reserved_576_583 : 8; /* W9 */ uint64_t vfi_lso_total : 18; uint64_t vfi_lso_sizem1 : 3; @@ -2183,7 +2188,7 @@ struct nix_cn20k_sq_ctx_s { uint64_t vfi_lso_vld : 1; uint64_t reserved_630_639 : 10; uint64_t scm_lso_rem : 18; /* W10 */ - uint64_t reserved_658_703 : 46; + uint64_t sq_count_iova : 46; uint64_t octs : 48; /* W11 */ uint64_t reserved_752_767 : 16; uint64_t pkts : 48; /* W12 */ @@ -2193,7 +2198,7 @@ struct nix_cn20k_sq_ctx_s { uint64_t drop_octs : 48; /* W14 */ uint64_t reserved_944_959 : 16; uint64_t drop_pkts : 48; /* W15 */ - uint64_t reserved_1008_1023 : 16; + uint64_t seb_count : 16; }; /* [CN10K, .) NIX sq context hardware structure */ diff --git a/drivers/common/cnxk/roc_features.h b/drivers/common/cnxk/roc_features.h index 00e8b180f08..c7d2a73d990 100644 --- a/drivers/common/cnxk/roc_features.h +++ b/drivers/common/cnxk/roc_features.h @@ -125,4 +125,11 @@ roc_feature_nix_has_cpt_cq_support(void) { return roc_model_is_cn20k(); } + +static inline bool +roc_feature_nix_has_sq_cnt_update(void) +{ + return roc_model_is_cn20k(); +} + #endif diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h index 4f8118d4e4e..0d5e524757d 100644 --- a/drivers/common/cnxk/roc_nix.h +++ b/drivers/common/cnxk/roc_nix.h @@ -493,6 +493,8 @@ struct roc_nix_sq { void *lmt_addr; void *sqe_mem; void *fc; + void *sq_cnt_ptr; + uint8_t update_sq_cnt; uint8_t tc; bool enable; }; @@ -1083,6 +1085,7 @@ int __roc_api roc_nix_sq_fini(struct roc_nix_sq *sq); int __roc_api roc_nix_sq_ena_dis(struct roc_nix_sq *sq, bool enable); void __roc_api roc_nix_sq_head_tail_get(struct roc_nix *roc_nix, uint16_t qid, uint32_t *head, uint32_t *tail); +int __roc_api roc_nix_sq_cnt_update(struct roc_nix_sq *sq, bool enable); /* PTP */ int __roc_api roc_nix_ptp_rx_ena_dis(struct roc_nix *roc_nix, int enable); diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c index e19a6877e60..e59cd775388 100644 --- a/drivers/common/cnxk/roc_nix_queue.c +++ b/drivers/common/cnxk/roc_nix_queue.c @@ -1464,7 +1464,7 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq) if (roc_nix->sqb_slack) nb_sqb_bufs += roc_nix->sqb_slack; - else + else if (!sq->sq_cnt_ptr) nb_sqb_bufs += PLT_MAX((int)thr, (int)ROC_NIX_SQB_SLACK_DFLT); /* Explicitly set nat_align alone as by default pool is with both * nat_align and buf_offset = 1 which we don't want for SQB. @@ -1473,7 +1473,9 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq) pool.nat_align = 1; memset(&aura, 0, sizeof(aura)); - aura.fc_ena = 1; + /* Disable SQ pool FC updates when SQ count updates are used */ + if (!sq->sq_cnt_ptr) + aura.fc_ena = 1; if (roc_model_is_cn9k() || roc_errata_npa_has_no_fc_stype_ststp()) aura.fc_stype = 0x0; /* STF */ else @@ -1827,6 +1829,11 @@ sq_init(struct nix *nix, struct roc_nix_sq *sq, uint32_t rr_quantum, uint16_t sm aq->sq.sq_int_ena |= BIT(NIX_SQINT_SEND_ERR); aq->sq.sq_int_ena |= BIT(NIX_SQINT_MNQ_ERR); + /* HW atomic update of SQ count */ + if (sq->sq_cnt_ptr) { + aq->sq.sq_count_iova = ((uintptr_t)sq->sq_cnt_ptr) >> 3; + aq->sq.update_sq_count = sq->update_sq_cnt; + } /* Many to one reduction */ aq->sq.qint_idx = sq->qid % nix->qints; if (roc_errata_nix_assign_incorrect_qint()) { @@ -2133,3 +2140,62 @@ roc_nix_q_err_cb_unregister(struct roc_nix *roc_nix) dev->ops->q_err_cb = NULL; } + +int +roc_nix_sq_cnt_update(struct roc_nix_sq *sq, bool enable) +{ + struct nix *nix = roc_nix_to_nix_priv(sq->roc_nix); + struct mbox *mbox = mbox_get((&nix->dev)->mbox); + int64_t __plt_atomic *sq_cntm = (int64_t __plt_atomic *)sq->sq_cnt_ptr; + struct nix_cn20k_aq_enq_rsp *rsp; + struct nix_cn20k_aq_enq_req *aq; + int rc; + + aq = mbox_alloc_msg_nix_cn20k_aq_enq(mbox); + if (!aq) { + mbox_put(mbox); + return -ENOSPC; + } + + aq->qidx = sq->qid; + aq->ctype = NIX_AQ_CTYPE_SQ; + aq->op = NIX_AQ_INSTOP_READ; + rc = mbox_process_msg(mbox, (void *)&rsp); + if (rc) { + mbox_put(mbox); + return rc; + } + + /* Check if sq is already in same state */ + if ((enable && rsp->sq.update_sq_count) || (!enable && !rsp->sq.update_sq_count)) { + mbox_put(mbox); + return 0; + } + + /* Disable sq */ + aq = mbox_alloc_msg_nix_cn20k_aq_enq(mbox); + if (!aq) { + mbox_put(mbox); + return -ENOSPC; + } + + aq->qidx = sq->qid; + aq->ctype = NIX_AQ_CTYPE_SQ; + aq->op = NIX_AQ_INSTOP_WRITE; + aq->sq_mask.update_sq_count = ~aq->sq_mask.update_sq_count; + aq->sq.update_sq_count = enable; + if (enable) + aq->sq.update_sq_count = sq->update_sq_cnt; + rc = mbox_process(mbox); + if (rc) { + mbox_put(mbox); + return rc; + } + if (enable) + plt_atomic_store_explicit(sq_cntm, sq->nb_desc, plt_memory_order_relaxed); + else + plt_atomic_store_explicit(sq_cntm, 0, plt_memory_order_relaxed); + + mbox_put(mbox); + return 0; +} diff --git a/drivers/common/cnxk/roc_nix_tm.c b/drivers/common/cnxk/roc_nix_tm.c index 2771fd8fc4c..76c0f018849 100644 --- a/drivers/common/cnxk/roc_nix_tm.c +++ b/drivers/common/cnxk/roc_nix_tm.c @@ -601,7 +601,7 @@ roc_nix_tm_sq_flush_spin(struct roc_nix_sq *sq) /* SQ reached quiescent state */ if (sqb_cnt <= 1 && head_off == tail_off && - (*(volatile uint64_t *)sq->fc == sq->aura_sqb_bufs)) { + (sq->sq_cnt_ptr || (*(volatile uint64_t *)sq->fc == sq->aura_sqb_bufs))) { break; } diff --git a/drivers/common/cnxk/roc_nix_tm_ops.c b/drivers/common/cnxk/roc_nix_tm_ops.c index 951c310a560..09d014a276c 100644 --- a/drivers/common/cnxk/roc_nix_tm_ops.c +++ b/drivers/common/cnxk/roc_nix_tm_ops.c @@ -19,6 +19,12 @@ roc_nix_tm_sq_aura_fc(struct roc_nix_sq *sq, bool enable) plt_tm_dbg("Setting SQ %u SQB aura FC to %s", sq->qid, enable ? "enable" : "disable"); + /* For cn20K, enable/disable SQ count updates if the SQ count pointer + * was allocated based on the enable field. + */ + if (sq->sq_cnt_ptr) + return roc_nix_sq_cnt_update(sq, enable); + lf = idev_npa_obj_get(); if (!lf) return NPA_ERR_DEVICE_NOT_BOUNDED; @@ -554,7 +560,7 @@ roc_nix_tm_hierarchy_disable(struct roc_nix *roc_nix) tail_off = (val >> 28) & 0x3F; if (sqb_cnt > 1 || head_off != tail_off || - (*(uint64_t *)sq->fc != sq->aura_sqb_bufs)) + (!sq->sq_cnt_ptr && (*(uint64_t *)sq->fc != sq->aura_sqb_bufs))) plt_err("Failed to gracefully flush sq %u", sq->qid); } diff --git a/drivers/common/cnxk/roc_platform.h b/drivers/common/cnxk/roc_platform.h index ff3a25e57f2..e22a50d47a4 100644 --- a/drivers/common/cnxk/roc_platform.h +++ b/drivers/common/cnxk/roc_platform.h @@ -212,6 +212,12 @@ plt_thread_is_valid(plt_thread_t thr) #define plt_io_rmb() rte_io_rmb() #define plt_atomic_thread_fence rte_atomic_thread_fence +#define plt_atomic_store_explicit rte_atomic_store_explicit +#define plt_atomic_load_explicit rte_atomic_load_explicit +#define plt_memory_order_release rte_memory_order_release +#define plt_memory_order_acquire rte_memory_order_acquire +#define plt_memory_order_relaxed rte_memory_order_relaxed + #define plt_bit_relaxed_get32 rte_bit_relaxed_get32 #define plt_bit_relaxed_set32 rte_bit_relaxed_set32 #define plt_bit_relaxed_clear32 rte_bit_relaxed_clear32 diff --git a/drivers/common/cnxk/roc_platform_base_symbols.c b/drivers/common/cnxk/roc_platform_base_symbols.c index cbb3fec5623..e51a4a111e6 100644 --- a/drivers/common/cnxk/roc_platform_base_symbols.c +++ b/drivers/common/cnxk/roc_platform_base_symbols.c @@ -365,6 +365,7 @@ RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_rss_reta_get) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_rss_flowkey_set) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_rss_default_setup) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_num_xstats_get) +RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_sq_cnt_update) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_stats_get) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_stats_reset) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_stats_queue_get) From ba7254a8d1a608e6e14f5a733bf4983eb38c115a Mon Sep 17 00:00:00 2001 From: Rakesh Kudurumalla Date: Thu, 13 Nov 2025 10:07:55 +0530 Subject: [PATCH 33/99] common/cnxk: check 16B alignment Added function to check whether board supports 16B alignment Signed-off-by: Rakesh Kudurumalla --- drivers/common/cnxk/roc_features.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/common/cnxk/roc_features.h b/drivers/common/cnxk/roc_features.h index c7d2a73d990..9c6adfe0589 100644 --- a/drivers/common/cnxk/roc_features.h +++ b/drivers/common/cnxk/roc_features.h @@ -132,4 +132,9 @@ roc_feature_nix_has_sq_cnt_update(void) return roc_model_is_cn20k(); } +static inline bool +roc_feature_nix_has_16b_align(void) +{ + return roc_model_is_cn20k(); +} #endif From bf9a33976460e6de1cb1479d4f0b9bbee6ee1466 Mon Sep 17 00:00:00 2001 From: Sunil Kumar Kori Date: Thu, 13 Nov 2025 10:07:56 +0530 Subject: [PATCH 34/99] common/cnxk: configure back pressure on pool On CN20K SoC, back pressure can be configured for eight different traffic classes per pool along with threshold and BPIDs. RoC API is added to configure the same. Signed-off-by: Sunil Kumar Kori --- drivers/common/cnxk/roc_nix.h | 1 + drivers/common/cnxk/roc_nix_fc.c | 60 +++++++ drivers/common/cnxk/roc_npa.c | 155 +++++++++++++++++- drivers/common/cnxk/roc_npa.h | 2 + drivers/common/cnxk/roc_npa_priv.h | 5 +- .../common/cnxk/roc_platform_base_symbols.c | 1 + 6 files changed, 222 insertions(+), 2 deletions(-) diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h index 0d5e524757d..0979dc415c2 100644 --- a/drivers/common/cnxk/roc_nix.h +++ b/drivers/common/cnxk/roc_nix.h @@ -600,6 +600,7 @@ struct roc_nix { uint16_t rep_cnt; uint16_t rep_pfvf_map[MAX_PFVF_REP]; bool reass_ena; + bool use_multi_bpids; TAILQ_ENTRY(roc_nix) next; #define ROC_NIX_MEM_SZ (6 * 1131) diff --git a/drivers/common/cnxk/roc_nix_fc.c b/drivers/common/cnxk/roc_nix_fc.c index e35c993f965..ddabd15a5d1 100644 --- a/drivers/common/cnxk/roc_nix_fc.c +++ b/drivers/common/cnxk/roc_nix_fc.c @@ -549,6 +549,61 @@ nix_rx_chan_multi_bpid_cfg(struct roc_nix *roc_nix, uint8_t chan, uint16_t bpid, #define NIX_BPID_INVALID 0xFFFF +static void +nix_fc_npa_multi_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_handle, uint8_t ena, uint8_t force, + uint8_t tc, uint64_t drop_percent) +{ + uint32_t pool_id = roc_npa_aura_handle_to_aura(pool_handle); + struct nix *nix = roc_nix_to_nix_priv(roc_nix); + struct npa_lf *lf = idev_npa_obj_get(); + struct npa_aura_attr *aura_attr; + uint8_t bp_thresh, bp_ena; + uint16_t bpid; + int i; + + if (!lf) + return; + + aura_attr = &lf->aura_attr[pool_id]; + + bp_thresh = NIX_RQ_AURA_BP_THRESH(drop_percent, aura_attr->limit, aura_attr->shift); + bpid = aura_attr->nix0_bpid; + bp_ena = aura_attr->bp_ena; + + /* BP is already enabled. */ + if ((bp_ena & (0x1 << tc)) && ena) { + if (bp_thresh != aura_attr->bp_thresh[tc]) { + if (roc_npa_pool_bp_configure(pool_id, nix->bpid[0], bp_thresh, tc, true)) + plt_err("Enabling backpressue failed on pool 0x%" PRIx32, pool_id); + } else { + aura_attr->ref_count++; + } + + return; + } + + if (ena) { + if (roc_npa_pool_bp_configure(pool_id, nix->bpid[0], bp_thresh, tc, true)) + plt_err("Enabling backpressue failed on pool 0x%" PRIx32, pool_id); + else + aura_attr->ref_count++; + } else { + bool found = !!force; + + /* Don't disable if existing BPID is not within this port's list */ + for (i = 0; i < nix->chan_cnt; i++) + if (bpid == nix->bpid[i]) + found = true; + if (!found) + return; + else if ((aura_attr->ref_count > 0) && --(aura_attr->ref_count)) + return; + + if (roc_npa_pool_bp_configure(pool_id, 0, 0, 0, false)) + plt_err("Disabling backpressue failed on pool 0x%" PRIx32, pool_id); + } +} + void roc_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, uint8_t ena, uint8_t force, uint8_t tc, uint64_t drop_percent) @@ -567,6 +622,11 @@ roc_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, uint8_t ena, ui if (!lf) return; + if (roc_model_is_cn20k() && roc_nix->use_multi_bpids) { + nix_fc_npa_multi_bp_cfg(roc_nix, pool_id, ena, force, tc, drop_percent); + return; + } + aura_attr = &lf->aura_attr[aura_id]; bp_intf = 1 << nix->is_nix1; diff --git a/drivers/common/cnxk/roc_npa.c b/drivers/common/cnxk/roc_npa.c index d5ebfbfc11f..f9824f66560 100644 --- a/drivers/common/cnxk/roc_npa.c +++ b/drivers/common/cnxk/roc_npa.c @@ -172,10 +172,47 @@ npa_aura_init(struct mbox *m_box, uint32_t aura_id, struct npa_aura_s *aura) return rc; } +static inline void +npa_pool_multi_bp_reset(struct npa_cn20k_aq_enq_req *pool_req) +{ + pool_req->pool.bp_0 = 0; + pool_req->pool.bp_1 = 0; + pool_req->pool.bp_2 = 0; + pool_req->pool.bp_3 = 0; + pool_req->pool.bp_4 = 0; + pool_req->pool.bp_5 = 0; + pool_req->pool.bp_6 = 0; + pool_req->pool.bp_7 = 0; + pool_req->pool.bp_ena_0 = 0; + pool_req->pool.bp_ena_1 = 0; + pool_req->pool.bp_ena_2 = 0; + pool_req->pool.bp_ena_3 = 0; + pool_req->pool.bp_ena_4 = 0; + pool_req->pool.bp_ena_5 = 0; + pool_req->pool.bp_ena_6 = 0; + pool_req->pool.bp_ena_7 = 0; + pool_req->pool_mask.bp_0 = ~(pool_req->pool_mask.bp_0); + pool_req->pool_mask.bp_1 = ~(pool_req->pool_mask.bp_1); + pool_req->pool_mask.bp_2 = ~(pool_req->pool_mask.bp_2); + pool_req->pool_mask.bp_3 = ~(pool_req->pool_mask.bp_3); + pool_req->pool_mask.bp_4 = ~(pool_req->pool_mask.bp_4); + pool_req->pool_mask.bp_5 = ~(pool_req->pool_mask.bp_5); + pool_req->pool_mask.bp_6 = ~(pool_req->pool_mask.bp_6); + pool_req->pool_mask.bp_7 = ~(pool_req->pool_mask.bp_7); + pool_req->pool_mask.bp_ena_0 = ~(pool_req->pool_mask.bp_ena_0); + pool_req->pool_mask.bp_ena_1 = ~(pool_req->pool_mask.bp_ena_1); + pool_req->pool_mask.bp_ena_2 = ~(pool_req->pool_mask.bp_ena_2); + pool_req->pool_mask.bp_ena_3 = ~(pool_req->pool_mask.bp_ena_3); + pool_req->pool_mask.bp_ena_4 = ~(pool_req->pool_mask.bp_ena_4); + pool_req->pool_mask.bp_ena_5 = ~(pool_req->pool_mask.bp_ena_5); + pool_req->pool_mask.bp_ena_6 = ~(pool_req->pool_mask.bp_ena_6); + pool_req->pool_mask.bp_ena_7 = ~(pool_req->pool_mask.bp_ena_7); +} + static int npa_aura_pool_fini(struct mbox *m_box, uint32_t aura_id, uint64_t aura_handle) { - struct npa_cn20k_aq_enq_req *aura_req_cn20k, *pool_req_cn20k; + struct npa_cn20k_aq_enq_req *aura_req_cn20k, *pool_req_cn20k = NULL; struct npa_aq_enq_req *aura_req, *pool_req; struct npa_aq_enq_rsp *aura_rsp, *pool_rsp; struct mbox_dev *mdev = &m_box->dev[0]; @@ -201,6 +238,10 @@ npa_aura_pool_fini(struct mbox *m_box, uint32_t aura_id, uint64_t aura_handle) } if (pool_req == NULL) goto exit; + + /* Disable backpressure on pool on CN20K */ + if (roc_model_is_cn20k()) + npa_pool_multi_bp_reset(pool_req_cn20k); pool_req->aura_id = aura_id; pool_req->ctype = NPA_AQ_CTYPE_POOL; pool_req->op = NPA_AQ_INSTOP_WRITE; @@ -983,6 +1024,118 @@ roc_npa_zero_aura_handle(void) return 0; } +int +roc_npa_pool_bp_configure(uint64_t aura_handle, uint16_t bpid, uint8_t bp_thresh, uint8_t bp_class, + bool enable) +{ + uint32_t pool_id = roc_npa_aura_handle_to_aura(aura_handle); + struct npa_lf *lf = idev_npa_obj_get(); + struct npa_cn20k_aq_enq_req *aq; + uint8_t bp, bp_ena; + struct mbox *mbox; + int rc = 0; + + plt_npa_dbg("Setting BPID %u BP_CLASS %u enable %u on pool %" PRIx64, bpid, bp_class, + bp_thresh, aura_handle); + + if (lf == NULL) + return NPA_ERR_PARAM; + + mbox = mbox_get(lf->mbox); + aq = mbox_alloc_msg_npa_cn20k_aq_enq(mbox); + if (aq == NULL) { + rc = -ENOSPC; + goto fail; + } + + aq->aura_id = pool_id; + aq->ctype = NPA_AQ_CTYPE_POOL; + aq->op = NPA_AQ_INSTOP_WRITE; + + if (enable) { + aq->pool.bpid_0 = bpid; + aq->pool_mask.bpid_0 = ~(aq->pool_mask.bpid_0); + + bp = bp_thresh; + } else { + bp = 0; + } + + switch (bp_class) { + case 0: + aq->pool.bp_0 = bp; + aq->pool_mask.bp_0 = ~(aq->pool_mask.bp_0); + aq->pool.bp_ena_0 = enable; + aq->pool_mask.bp_ena_0 = ~(aq->pool_mask.bp_ena_0); + break; + case 1: + aq->pool.bp_1 = bp; + aq->pool_mask.bp_1 = ~(aq->pool_mask.bp_1); + aq->pool.bp_ena_1 = enable; + aq->pool_mask.bp_ena_1 = ~(aq->pool_mask.bp_ena_1); + break; + case 2: + aq->pool.bp_2 = bp; + aq->pool_mask.bp_2 = ~(aq->pool_mask.bp_2); + aq->pool.bp_ena_2 = enable; + aq->pool_mask.bp_ena_2 = ~(aq->pool_mask.bp_ena_2); + break; + case 3: + aq->pool.bp_3 = bp; + aq->pool_mask.bp_3 = ~(aq->pool_mask.bp_3); + aq->pool.bp_ena_3 = enable; + aq->pool_mask.bp_ena_3 = ~(aq->pool_mask.bp_ena_3); + break; + case 4: + aq->pool.bp_4 = bp; + aq->pool_mask.bp_4 = ~(aq->pool_mask.bp_4); + aq->pool.bp_ena_4 = enable; + aq->pool_mask.bp_ena_4 = ~(aq->pool_mask.bp_ena_4); + break; + case 5: + aq->pool.bp_5 = bp; + aq->pool_mask.bp_5 = ~(aq->pool_mask.bp_5); + aq->pool.bp_ena_5 = enable; + aq->pool_mask.bp_ena_5 = ~(aq->pool_mask.bp_ena_5); + break; + case 6: + aq->pool.bp_6 = bp; + aq->pool_mask.bp_6 = ~(aq->pool_mask.bp_6); + aq->pool.bp_ena_6 = enable; + aq->pool_mask.bp_ena_6 = ~(aq->pool_mask.bp_ena_6); + break; + case 7: + aq->pool.bp_7 = bp; + aq->pool_mask.bp_7 = ~(aq->pool_mask.bp_7); + aq->pool.bp_ena_7 = enable; + aq->pool_mask.bp_ena_7 = ~(aq->pool_mask.bp_ena_7); + break; + default: + rc = -EINVAL; + goto fail; + } + + rc = mbox_process(mbox); + if (rc) + goto fail; + + bp_ena = lf->aura_attr[pool_id].bp_ena; + bp_ena &= ~(1 << bp_class); + bp_ena |= (enable << bp_class); + + if (enable && !lf->aura_attr[pool_id].bp_ena) + lf->aura_attr[pool_id].nix0_bpid = bpid; + else if (!enable && !lf->aura_attr[pool_id].bp_ena) + lf->aura_attr[pool_id].nix0_bpid = 0; + + lf->aura_attr[pool_id].bp_ena = bp_ena; + lf->aura_attr[pool_id].bp_thresh[bp_class] = bp; + +fail: + mbox_put(mbox); + return rc; +} + int roc_npa_aura_bp_configure(uint64_t aura_handle, uint16_t bpid, uint8_t bp_intf, uint8_t bp_thresh, bool enable) diff --git a/drivers/common/cnxk/roc_npa.h b/drivers/common/cnxk/roc_npa.h index 853c0fed43f..336a43f95cb 100644 --- a/drivers/common/cnxk/roc_npa.h +++ b/drivers/common/cnxk/roc_npa.h @@ -816,6 +816,8 @@ uint64_t __roc_api roc_npa_buf_type_mask(uint64_t aura_handle); uint64_t __roc_api roc_npa_buf_type_limit_get(uint64_t type_mask); int __roc_api roc_npa_aura_bp_configure(uint64_t aura_id, uint16_t bpid, uint8_t bp_intf, uint8_t bp_thresh, bool enable); +int __roc_api roc_npa_pool_bp_configure(uint64_t pool_id, uint16_t bpid, uint8_t bp_thresh, + uint8_t bp_class, bool enable); /* Init callbacks */ typedef int (*roc_npa_lf_init_cb_t)(struct plt_pci_device *pci_dev); diff --git a/drivers/common/cnxk/roc_npa_priv.h b/drivers/common/cnxk/roc_npa_priv.h index 060df9ab049..0223e4a4382 100644 --- a/drivers/common/cnxk/roc_npa_priv.h +++ b/drivers/common/cnxk/roc_npa_priv.h @@ -55,7 +55,10 @@ struct npa_aura_attr { uint64_t shift; uint64_t limit; uint8_t bp_ena; - uint8_t bp; + union { + uint8_t bp; /* CN9K, CN10K */ + uint8_t bp_thresh[8]; /* CN20K */ + }; }; struct dev; diff --git a/drivers/common/cnxk/roc_platform_base_symbols.c b/drivers/common/cnxk/roc_platform_base_symbols.c index e51a4a111e6..fee4b4d6889 100644 --- a/drivers/common/cnxk/roc_platform_base_symbols.c +++ b/drivers/common/cnxk/roc_platform_base_symbols.c @@ -431,6 +431,7 @@ RTE_EXPORT_INTERNAL_SYMBOL(roc_npa_aura_op_range_set) RTE_EXPORT_INTERNAL_SYMBOL(roc_npa_aura_op_range_get) RTE_EXPORT_INTERNAL_SYMBOL(roc_npa_pool_op_pc_reset) RTE_EXPORT_INTERNAL_SYMBOL(roc_npa_aura_drop_set) +RTE_EXPORT_INTERNAL_SYMBOL(roc_npa_pool_bp_configure) RTE_EXPORT_INTERNAL_SYMBOL(roc_npa_pool_create) RTE_EXPORT_INTERNAL_SYMBOL(roc_npa_aura_create) RTE_EXPORT_INTERNAL_SYMBOL(roc_npa_aura_limit_modify) From f3c15bb6062475a62451c62ca2c5c9a5e12706c8 Mon Sep 17 00:00:00 2001 From: Sunil Kumar Kori Date: Thu, 13 Nov 2025 10:07:57 +0530 Subject: [PATCH 35/99] common/cnxk: fix max number of SQB buffers in clean up By default, SQB pool is created with max (512 buffers) + extra threshold buffers and aura limit is set to 512 + thr. But while clean up, aura limit is reset to MAX (512 buffers) only before destroying the pool. Hence while destroying the pool, only 512 buffers are cleaned from aura and extra threshold buffers are left as it is. At later stage if same SQB pool is created then H/W throws error for extra threshold buffers that it is already in pool. Fixes: 780f90e951a5 ("common/cnxk: restore NIX SQB pool limit before destroy") Cc: stable@dpdk.org Signed-off-by: Sunil Kumar Kori --- drivers/common/cnxk/roc_nix_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c index e59cd775388..1de6e51c5ef 100644 --- a/drivers/common/cnxk/roc_nix_queue.c +++ b/drivers/common/cnxk/roc_nix_queue.c @@ -2057,7 +2057,7 @@ roc_nix_sq_fini(struct roc_nix_sq *sq) /* Restore limit to max SQB count that the pool was created * for aura drain to succeed. */ - roc_npa_aura_limit_modify(sq->aura_handle, NIX_MAX_SQB); + roc_npa_aura_limit_modify(sq->aura_handle, sq->aura_sqb_bufs); rc |= roc_npa_pool_destroy(sq->aura_handle); plt_free(sq->fc); plt_free(sq->sqe_mem); From c993d8fd5a3391197cf5104bf3d0a378e1268ba1 Mon Sep 17 00:00:00 2001 From: Nithin Dabilpuram Date: Thu, 13 Nov 2025 10:07:58 +0530 Subject: [PATCH 36/99] common/cnxk: support SQ resize Add support for SQ resize by making SQB mem allocated in chunks of SQB size. Signed-off-by: Nithin Dabilpuram --- drivers/common/cnxk/roc_nix.h | 3 + drivers/common/cnxk/roc_nix_queue.c | 389 ++++++++++++++++-- .../common/cnxk/roc_platform_base_symbols.c | 1 + 3 files changed, 366 insertions(+), 27 deletions(-) diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h index 0979dc415c2..326decde2b9 100644 --- a/drivers/common/cnxk/roc_nix.h +++ b/drivers/common/cnxk/roc_nix.h @@ -483,6 +483,7 @@ struct roc_nix_sq { bool cq_ena; uint8_t fc_hyst_bits; /* End of Input parameters */ + uint16_t sqes_per_sqb; uint16_t sqes_per_sqb_log2; struct roc_nix *roc_nix; uint64_t aura_handle; @@ -588,6 +589,7 @@ struct roc_nix { uint16_t inb_cfg_param2; bool force_tail_drop; bool dis_xqe_drop; + bool sq_resize_ena; /* End of input parameters */ /* LMT line base for "Per Core Tx LMT line" mode*/ uintptr_t lmt_base; @@ -1087,6 +1089,7 @@ int __roc_api roc_nix_sq_ena_dis(struct roc_nix_sq *sq, bool enable); void __roc_api roc_nix_sq_head_tail_get(struct roc_nix *roc_nix, uint16_t qid, uint32_t *head, uint32_t *tail); int __roc_api roc_nix_sq_cnt_update(struct roc_nix_sq *sq, bool enable); +int __roc_api roc_nix_sq_resize(struct roc_nix_sq *sq, uint32_t nb_desc); /* PTP */ int __roc_api roc_nix_ptp_rx_ena_dis(struct roc_nix *roc_nix, int enable); diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c index 1de6e51c5ef..2c7fce10671 100644 --- a/drivers/common/cnxk/roc_nix_queue.c +++ b/drivers/common/cnxk/roc_nix_queue.c @@ -1430,42 +1430,77 @@ roc_nix_cq_fini(struct roc_nix_cq *cq) return 0; } -static int -sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq) +static uint16_t +sqes_per_sqb_calc(uint16_t sqb_size, enum roc_nix_sq_max_sqe_sz max_sqe_sz) { - struct nix *nix = roc_nix_to_nix_priv(roc_nix); - uint16_t sqes_per_sqb, count, nb_sqb_bufs, thr; - struct npa_pool_s pool; - struct npa_aura_s aura; - uint64_t blk_sz; - uint64_t iova; - int rc; + uint16_t sqes_per_sqb; - blk_sz = nix->sqb_size; - if (sq->max_sqe_sz == roc_nix_maxsqesz_w16) - sqes_per_sqb = (blk_sz / 8) / 16; + if (max_sqe_sz == roc_nix_maxsqesz_w16) + sqes_per_sqb = (sqb_size / 8) / 16; else - sqes_per_sqb = (blk_sz / 8) / 8; + sqes_per_sqb = (sqb_size / 8) / 8; /* Reserve One SQE in each SQB to hold pointer for next SQB */ sqes_per_sqb -= 1; + return sqes_per_sqb; +} + +static uint16_t +sq_desc_to_sqb(struct nix *nix, uint16_t sqes_per_sqb, uint32_t nb_desc) +{ + struct roc_nix *roc_nix = nix_priv_to_roc_nix(nix); + uint16_t nb_sqb_bufs; + + nb_desc = PLT_MAX(512U, nb_desc); + nb_sqb_bufs = PLT_DIV_CEIL(nb_desc, sqes_per_sqb); - sq->nb_desc = PLT_MAX(512U, sq->nb_desc); - nb_sqb_bufs = PLT_DIV_CEIL(sq->nb_desc, sqes_per_sqb); - thr = PLT_DIV_CEIL((nb_sqb_bufs * ROC_NIX_SQB_THRESH), 100); nb_sqb_bufs += NIX_SQB_PREFETCH; /* Clamp up the SQB count */ nb_sqb_bufs = PLT_MAX(NIX_DEF_SQB, nb_sqb_bufs); nb_sqb_bufs = PLT_MIN(roc_nix->max_sqb_count, (uint16_t)nb_sqb_bufs); - sq->nb_sqb_bufs = nb_sqb_bufs; - sq->sqes_per_sqb_log2 = (uint16_t)plt_log2_u32(sqes_per_sqb); - sq->nb_sqb_bufs_adj = nb_sqb_bufs; + return nb_sqb_bufs; +} + +static uint16_t +sqb_slack_adjust(struct nix *nix, uint16_t nb_sqb_bufs, bool sq_cnt_ena) +{ + struct roc_nix *roc_nix = nix_priv_to_roc_nix(nix); + uint16_t thr; + thr = PLT_DIV_CEIL((nb_sqb_bufs * ROC_NIX_SQB_THRESH), 100); if (roc_nix->sqb_slack) nb_sqb_bufs += roc_nix->sqb_slack; - else if (!sq->sq_cnt_ptr) + else if (!sq_cnt_ena) nb_sqb_bufs += PLT_MAX((int)thr, (int)ROC_NIX_SQB_SLACK_DFLT); + return nb_sqb_bufs; +} + +static int +sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq) +{ + struct nix *nix = roc_nix_to_nix_priv(roc_nix); + uint16_t sqes_per_sqb, count, nb_sqb_bufs; + struct npa_pool_s pool; + struct npa_aura_s aura; + uint64_t blk_sz; + uint64_t iova; + int rc; + + blk_sz = nix->sqb_size; + sqes_per_sqb = sqes_per_sqb_calc(blk_sz, sq->max_sqe_sz); + + /* Translate desc count to SQB count */ + nb_sqb_bufs = sq_desc_to_sqb(nix, sqes_per_sqb, sq->nb_desc); + + sq->sqes_per_sqb = sqes_per_sqb; + sq->sqes_per_sqb_log2 = (uint16_t)plt_log2_u32(sqes_per_sqb); + sq->nb_sqb_bufs_adj = nb_sqb_bufs; + sq->nb_sqb_bufs = nb_sqb_bufs; + + /* Add slack to SQB's */ + nb_sqb_bufs = sqb_slack_adjust(nix, nb_sqb_bufs, !!sq->sq_cnt_ptr); + /* Explicitly set nat_align alone as by default pool is with both * nat_align and buf_offset = 1 which we don't want for SQB. */ @@ -1520,6 +1555,96 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq) return rc; } +static int +sqb_pool_dyn_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq) +{ + struct nix *nix = roc_nix_to_nix_priv(roc_nix); + uint16_t count, nb_sqb_bufs; + uint16_t max_sqb_count; + struct npa_pool_s pool; + struct npa_aura_s aura; + uint16_t sqes_per_sqb; + uint64_t blk_sz; + uint64_t iova; + int rc; + + blk_sz = nix->sqb_size; + sqes_per_sqb = sqes_per_sqb_calc(blk_sz, sq->max_sqe_sz); + + /* Translate desc count to SQB count */ + nb_sqb_bufs = sq_desc_to_sqb(nix, sqes_per_sqb, sq->nb_desc); + + sq->sqes_per_sqb_log2 = (uint16_t)plt_log2_u32(sqes_per_sqb); + sq->sqes_per_sqb = sqes_per_sqb; + sq->nb_sqb_bufs_adj = nb_sqb_bufs; + sq->nb_sqb_bufs = nb_sqb_bufs; + + /* Add slack to SQB's */ + nb_sqb_bufs = sqb_slack_adjust(nix, nb_sqb_bufs, !!sq->sq_cnt_ptr); + + /* Explicitly set nat_align alone as by default pool is with both + * nat_align and buf_offset = 1 which we don't want for SQB. + */ + memset(&pool, 0, sizeof(struct npa_pool_s)); + pool.nat_align = 0; + + memset(&aura, 0, sizeof(aura)); + if (!sq->sq_cnt_ptr) + aura.fc_ena = 1; + if (roc_model_is_cn9k() || roc_errata_npa_has_no_fc_stype_ststp()) + aura.fc_stype = 0x0; /* STF */ + else + aura.fc_stype = 0x3; /* STSTP */ + aura.fc_addr = (uint64_t)sq->fc; + aura.fc_hyst_bits = sq->fc_hyst_bits & 0xF; + max_sqb_count = sqb_slack_adjust(nix, roc_nix->max_sqb_count, false); + rc = roc_npa_pool_create(&sq->aura_handle, blk_sz, max_sqb_count, &aura, &pool, 0); + if (rc) + goto fail; + + roc_npa_buf_type_update(sq->aura_handle, ROC_NPA_BUF_TYPE_SQB, 1); + roc_npa_aura_op_cnt_set(sq->aura_handle, 0, nb_sqb_bufs); + + /* Fill the initial buffers */ + for (count = 0; count < nb_sqb_bufs; count++) { + iova = (uint64_t)plt_zmalloc(blk_sz, ROC_ALIGN); + if (!iova) { + rc = -ENOMEM; + goto nomem; + } + plt_io_wmb(); + + roc_npa_aura_op_free(sq->aura_handle, 0, iova); + } + + if (roc_npa_aura_op_available_wait(sq->aura_handle, nb_sqb_bufs, 0) != nb_sqb_bufs) { + plt_err("Failed to free all pointers to the pool"); + rc = NIX_ERR_NO_MEM; + goto npa_fail; + } + + /* Update aura count */ + roc_npa_aura_limit_modify(sq->aura_handle, nb_sqb_bufs); + roc_npa_pool_op_range_set(sq->aura_handle, 0, UINT64_MAX); + sq->aura_sqb_bufs = nb_sqb_bufs; + + return rc; +npa_fail: +nomem: + while (count) { + iova = roc_npa_aura_op_alloc(sq->aura_handle, 0); + if (!iova) + break; + plt_free((uint64_t *)iova); + count--; + } + if (count) + plt_err("Failed to recover %u SQB's", count); + roc_npa_pool_destroy(sq->aura_handle); +fail: + return rc; +} + static int sq_cn9k_init(struct nix *nix, struct roc_nix_sq *sq, uint32_t rr_quantum, uint16_t smq) @@ -1768,10 +1893,10 @@ sq_cn10k_fini(struct nix *nix, struct roc_nix_sq *sq) return rc; } - if (aq->sq.smq_pend) + if (rsp->sq.smq_pend) plt_err("SQ has pending SQE's"); - count = aq->sq.sqb_count; + count = rsp->sq.sqb_count; sqes_per_sqb = 1 << sq->sqes_per_sqb_log2; /* Free SQB's that are used */ sqb_buf = (void *)rsp->sq.head_sqb; @@ -1939,6 +2064,7 @@ int roc_nix_sq_init(struct roc_nix *roc_nix, struct roc_nix_sq *sq) { struct nix *nix = roc_nix_to_nix_priv(roc_nix); + bool sq_resize_ena = roc_nix->sq_resize_ena; struct mbox *m_box = (&nix->dev)->mbox; uint16_t qid, smq = UINT16_MAX; uint32_t rr_quantum = 0; @@ -1964,7 +2090,10 @@ roc_nix_sq_init(struct roc_nix *roc_nix, struct roc_nix_sq *sq) goto fail; } - rc = sqb_pool_populate(roc_nix, sq); + if (sq_resize_ena) + rc = sqb_pool_dyn_populate(roc_nix, sq); + else + rc = sqb_pool_populate(roc_nix, sq); if (rc) goto nomem; @@ -2014,19 +2143,38 @@ roc_nix_sq_init(struct roc_nix *roc_nix, struct roc_nix_sq *sq) return rc; } +static void +nix_sqb_mem_dyn_free(uint64_t aura_handle, uint16_t count) +{ + uint64_t iova; + + /* Recover SQB's and free them back */ + while (count) { + iova = roc_npa_aura_op_alloc(aura_handle, 0); + if (!iova) + break; + plt_free((uint64_t *)iova); + count--; + } + if (count) + plt_err("Failed to recover %u SQB's", count); +} + int roc_nix_sq_fini(struct roc_nix_sq *sq) { - struct nix *nix; - struct mbox *mbox; + struct roc_nix *roc_nix = sq->roc_nix; + bool sq_resize_ena = roc_nix->sq_resize_ena; struct ndc_sync_op *ndc_req; + struct mbox *mbox; + struct nix *nix; uint16_t qid; int rc = 0; if (sq == NULL) return NIX_ERR_PARAM; - nix = roc_nix_to_nix_priv(sq->roc_nix); + nix = roc_nix_to_nix_priv(roc_nix); mbox = (&nix->dev)->mbox; qid = sq->qid; @@ -2058,14 +2206,201 @@ roc_nix_sq_fini(struct roc_nix_sq *sq) * for aura drain to succeed. */ roc_npa_aura_limit_modify(sq->aura_handle, sq->aura_sqb_bufs); + + if (sq_resize_ena) + nix_sqb_mem_dyn_free(sq->aura_handle, sq->aura_sqb_bufs); + rc |= roc_npa_pool_destroy(sq->aura_handle); plt_free(sq->fc); - plt_free(sq->sqe_mem); + if (!sq_resize_ena) + plt_free(sq->sqe_mem); nix->sqs[qid] = NULL; return rc; } +static int +sqb_aura_dyn_expand(struct roc_nix_sq *sq, uint16_t count) +{ + struct nix *nix = roc_nix_to_nix_priv(sq->roc_nix); + uint64_t *sqbs = NULL; + uint16_t blk_sz; + int i; + + blk_sz = nix->sqb_size; + sqbs = calloc(1, count * sizeof(uint64_t *)); + if (!sqbs) + return -ENOMEM; + + for (i = 0; i < count; i++) { + sqbs[i] = (uint64_t)plt_zmalloc(blk_sz, ROC_ALIGN); + if (!sqbs[i]) + break; + } + + if (i != count) { + i = i - 1; + for (; i >= 0; i--) + plt_free((void *)sqbs[i]); + free(sqbs); + return -ENOMEM; + } + + plt_io_wmb(); + + /* Add new buffers to sqb aura */ + for (i = 0; i < count; i++) + roc_npa_aura_op_free(sq->aura_handle, 0, sqbs[i]); + free(sqbs); + + /* Adjust SQ info */ + sq->nb_sqb_bufs += count; + sq->nb_sqb_bufs_adj += count; + sq->aura_sqb_bufs += count; + return 0; +} + +static int +sqb_aura_dyn_contract(struct roc_nix_sq *sq, uint16_t count) +{ + struct nix *nix = roc_nix_to_nix_priv(sq->roc_nix); + struct dev *dev = &nix->dev; + struct ndc_sync_op *ndc_req; + uint64_t *sqbs = NULL; + struct mbox *mbox; + uint64_t timeout; /* 10's of usec */ + uint64_t cycles; + int i, rc; + + mbox = dev->mbox; + /* Sync NDC-NIX-TX for LF */ + ndc_req = mbox_alloc_msg_ndc_sync_op(mbox_get(mbox)); + if (ndc_req == NULL) { + mbox_put(mbox); + return -EFAULT; + } + + ndc_req->nix_lf_tx_sync = 1; + rc = mbox_process(mbox); + if (rc) { + mbox_put(mbox); + return rc; + } + mbox_put(mbox); + + /* Wait for enough time based on shaper min rate */ + timeout = (sq->nb_desc * roc_nix_max_pkt_len(sq->roc_nix) * 8 * 1E5); + /* Wait for worst case scenario of this SQ being last priority + * and so have to wait for all other SQ's drain out by their own. + */ + timeout = timeout * nix->nb_tx_queues; + timeout = timeout / nix->tm_rate_min; + if (!timeout) + timeout = 10000; + cycles = (timeout * 10 * plt_tsc_hz()) / (uint64_t)1E6; + cycles += plt_tsc_cycles(); + + sqbs = calloc(1, count * sizeof(uint64_t *)); + if (!sqbs) + return -ENOMEM; + + i = 0; + while (i < count && plt_tsc_cycles() < cycles) { + sqbs[i] = roc_npa_aura_op_alloc(sq->aura_handle, 0); + if (sqbs[i]) + i++; + else + plt_delay_us(1); + } + + if (i != count) { + plt_warn("SQ %u busy, unable to recover %u SQB's(%u desc)", sq->qid, count, + count * sq->sqes_per_sqb); + + /* Restore the SQB aura state and return */ + i--; + for (; i >= 0; i--) + roc_npa_aura_op_free(sq->aura_handle, 0, sqbs[i]); + free(sqbs); + return -EAGAIN; + } + + /* Extracted necessary SQB's, on free them */ + for (i = 0; i < count; i++) + plt_free((void *)sqbs[i]); + free(sqbs); + + /* Adjust SQ info */ + sq->nb_sqb_bufs -= count; + sq->nb_sqb_bufs_adj -= count; + sq->aura_sqb_bufs -= count; + return 0; +} + +int +roc_nix_sq_resize(struct roc_nix_sq *sq, uint32_t nb_desc) +{ + struct roc_nix *roc_nix = sq->roc_nix; + struct nix *nix = roc_nix_to_nix_priv(roc_nix); + uint16_t aura_sqb_bufs, nb_sqb_bufs, sqes_per_sqb; + int64_t *regaddr; + uint64_t wdata; + uint16_t diff; + int rc; + + if (!roc_nix->sq_resize_ena) + return -ENOTSUP; + + sqes_per_sqb = sq->sqes_per_sqb; + + /* Calculate new nb_sqb_bufs */ + nb_sqb_bufs = sq_desc_to_sqb(nix, sqes_per_sqb, nb_desc); + aura_sqb_bufs = sqb_slack_adjust(nix, nb_sqb_bufs, !!sq->sq_cnt_ptr); + + if (aura_sqb_bufs == sq->aura_sqb_bufs) + return 0; + + /* Issue atomic op to make sure all inflight LMTST's are complete + * assuming no new submissions will take place. + */ + wdata = ((uint64_t)sq->qid) << 32; + regaddr = (int64_t *)(nix->base + NIX_LF_SQ_OP_STATUS); + roc_atomic64_add_nosync(wdata, regaddr); + + /* Expand or Contract SQB aura */ + if (aura_sqb_bufs > sq->aura_sqb_bufs) { + /* Increase the limit */ + roc_npa_aura_limit_modify(sq->aura_handle, aura_sqb_bufs); + diff = aura_sqb_bufs - sq->aura_sqb_bufs; + roc_npa_aura_op_cnt_set(sq->aura_handle, 1, diff); + + rc = sqb_aura_dyn_expand(sq, diff); + } else { + diff = sq->aura_sqb_bufs - aura_sqb_bufs; + rc = sqb_aura_dyn_contract(sq, diff); + + /* Decrease the limit */ + if (!rc) { + roc_npa_aura_limit_modify(sq->aura_handle, aura_sqb_bufs); + roc_npa_aura_op_cnt_set(sq->aura_handle, 1, -(int64_t)diff); + } + } + + plt_io_wmb(); + if (!rc) { + sq->nb_desc = nb_desc; + if (sq->sq_cnt_ptr) + plt_atomic_store_explicit((uint64_t __plt_atomic *)sq->sq_cnt_ptr, nb_desc, + plt_memory_order_release); + *(uint64_t *)sq->fc = roc_npa_aura_op_cnt_get(sq->aura_handle); + } else { + roc_npa_aura_limit_modify(sq->aura_handle, sq->aura_sqb_bufs); + } + + plt_io_wmb(); + return rc; +} + void roc_nix_cq_head_tail_get(struct roc_nix *roc_nix, uint16_t qid, uint32_t *head, uint32_t *tail) diff --git a/drivers/common/cnxk/roc_platform_base_symbols.c b/drivers/common/cnxk/roc_platform_base_symbols.c index fee4b4d6889..43095741f52 100644 --- a/drivers/common/cnxk/roc_platform_base_symbols.c +++ b/drivers/common/cnxk/roc_platform_base_symbols.c @@ -352,6 +352,7 @@ RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_rq_fini) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_cq_init) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_cq_fini) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_sq_init) +RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_sq_resize) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_sq_fini) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_cq_head_tail_get) RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_sq_head_tail_get) From 9188d72b2143e7382aae12d22300f2542323389f Mon Sep 17 00:00:00 2001 From: Satha Rao Date: Thu, 13 Nov 2025 10:07:59 +0530 Subject: [PATCH 37/99] common/cnxk: increase Tx scheduler count CN10K platform supports Tx schedulers up to 2K. Signed-off-by: Satha Rao --- drivers/common/cnxk/roc_nix_priv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/common/cnxk/roc_nix_priv.h b/drivers/common/cnxk/roc_nix_priv.h index ae1a8fd7e09..308a62a70e6 100644 --- a/drivers/common/cnxk/roc_nix_priv.h +++ b/drivers/common/cnxk/roc_nix_priv.h @@ -41,7 +41,7 @@ struct nix_qint { }; /* Traffic Manager */ -#define NIX_TM_MAX_HW_TXSCHQ 1024 +#define NIX_TM_MAX_HW_TXSCHQ 2048 #define NIX_TM_HW_ID_INVALID UINT32_MAX #define NIX_TM_CHAN_INVALID UINT16_MAX From 9d845a3d13bf70cf356c511f0d1bccfb69dc4f81 Mon Sep 17 00:00:00 2001 From: Rakesh Kudurumalla Date: Thu, 13 Nov 2025 10:08:00 +0530 Subject: [PATCH 38/99] common/cnxk: fix null dereference Fix klockwork for NULL pointer dereference Signed-off-by: Rakesh Kudurumalla --- drivers/common/cnxk/roc_nix_queue.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c index 2c7fce10671..c7e37abf32e 100644 --- a/drivers/common/cnxk/roc_nix_queue.c +++ b/drivers/common/cnxk/roc_nix_queue.c @@ -112,6 +112,11 @@ roc_nix_sq_ena_dis(struct roc_nix_sq *sq, bool enable) { int rc = 0; + if (!sq) { + rc = NIX_ERR_PARAM; + goto done; + } + rc = roc_nix_tm_sq_aura_fc(sq, enable); if (rc) goto done; @@ -2163,9 +2168,9 @@ nix_sqb_mem_dyn_free(uint64_t aura_handle, uint16_t count) int roc_nix_sq_fini(struct roc_nix_sq *sq) { - struct roc_nix *roc_nix = sq->roc_nix; - bool sq_resize_ena = roc_nix->sq_resize_ena; struct ndc_sync_op *ndc_req; + struct roc_nix *roc_nix; + bool sq_resize_ena; struct mbox *mbox; struct nix *nix; uint16_t qid; @@ -2174,6 +2179,9 @@ roc_nix_sq_fini(struct roc_nix_sq *sq) if (sq == NULL) return NIX_ERR_PARAM; + roc_nix = sq->roc_nix; + sq_resize_ena = roc_nix->sq_resize_ena; + nix = roc_nix_to_nix_priv(roc_nix); mbox = (&nix->dev)->mbox; @@ -2228,7 +2236,7 @@ sqb_aura_dyn_expand(struct roc_nix_sq *sq, uint16_t count) int i; blk_sz = nix->sqb_size; - sqbs = calloc(1, count * sizeof(uint64_t *)); + sqbs = calloc(1, count * sizeof(uint64_t)); if (!sqbs) return -ENOMEM; @@ -2300,7 +2308,7 @@ sqb_aura_dyn_contract(struct roc_nix_sq *sq, uint16_t count) cycles = (timeout * 10 * plt_tsc_hz()) / (uint64_t)1E6; cycles += plt_tsc_cycles(); - sqbs = calloc(1, count * sizeof(uint64_t *)); + sqbs = calloc(1, count * sizeof(uint64_t)); if (!sqbs) return -ENOMEM; From 52ff61c2ca114691d74546f5ce16e8a7ebd164ae Mon Sep 17 00:00:00 2001 From: Satha Rao Date: Thu, 13 Nov 2025 10:08:01 +0530 Subject: [PATCH 39/99] common/cnxk: fix null SQ access Condition to check SQ is non NULL before access. Also pktio locks are simplified while doing threshold_profile config. Fixes: 90a903ffa6eb ("common/cnxk: split NIX TM hierarchy enable API") Cc: stable@dpdk.org Signed-off-by: Satha Rao --- drivers/common/cnxk/roc_nix_tm_ops.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/common/cnxk/roc_nix_tm_ops.c b/drivers/common/cnxk/roc_nix_tm_ops.c index 09d014a276c..230e9b72f64 100644 --- a/drivers/common/cnxk/roc_nix_tm_ops.c +++ b/drivers/common/cnxk/roc_nix_tm_ops.c @@ -624,6 +624,13 @@ roc_nix_tm_hierarchy_xmit_enable(struct roc_nix *roc_nix, enum roc_nix_tm_tree t sq_id = node->id; sq = nix->sqs[sq_id]; + if (!sq) { + plt_err("nb_rxq %d nb_txq %d sq_id %d lvl %d", nix->nb_rx_queues, + nix->nb_tx_queues, sq_id, node->lvl); + roc_nix_tm_dump(roc_nix, NULL); + roc_nix_dump(roc_nix, NULL); + return NIX_ERR_TM_INVALID_NODE; + } rc = roc_nix_sq_ena_dis(sq, true); if (rc) { plt_err("TM sw xon failed on SQ %u, rc=%d", node->id, From a8b2e7b664b73f8302c94a3665c624d8b879e1a6 Mon Sep 17 00:00:00 2001 From: Rahul Bhansali Date: Thu, 13 Nov 2025 10:08:02 +0530 Subject: [PATCH 40/99] common/cnxk: change aura field width Aura field width has changed from 20 bits to 17 bits for cn20k. Signed-off-by: Rahul Bhansali --- drivers/common/cnxk/roc_npa_type.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/common/cnxk/roc_npa_type.c b/drivers/common/cnxk/roc_npa_type.c index ed901389440..4c794972c0f 100644 --- a/drivers/common/cnxk/roc_npa_type.c +++ b/drivers/common/cnxk/roc_npa_type.c @@ -60,7 +60,7 @@ roc_npa_buf_type_mask(uint64_t aura_handle) uint64_t roc_npa_buf_type_limit_get(uint64_t type_mask) { - uint64_t wdata, reg; + uint64_t wdata, reg, shift; uint64_t limit = 0; struct npa_lf *lf; uint64_t aura_id; @@ -72,6 +72,7 @@ roc_npa_buf_type_limit_get(uint64_t type_mask) if (lf == NULL) return NPA_ERR_PARAM; + shift = roc_model_is_cn20k() ? 47 : 44; for (aura_id = 0; aura_id < lf->nr_pools; aura_id++) { if (plt_bitmap_get(lf->npa_bmp, aura_id)) continue; @@ -87,7 +88,7 @@ roc_npa_buf_type_limit_get(uint64_t type_mask) continue; } - wdata = aura_id << 44; + wdata = aura_id << shift; addr = (int64_t *)(lf->base + NPA_LF_AURA_OP_LIMIT); reg = roc_atomic64_add_nosync(wdata, addr); From 5a06d69a91f0cb06f98265b488ce482042ab7593 Mon Sep 17 00:00:00 2001 From: Nithin Dabilpuram Date: Thu, 13 Nov 2025 10:08:03 +0530 Subject: [PATCH 41/99] common/cnxk: fix inline inbound setup error handling Fix issue reported by klocwork. Fixes: f410059baac6 ("common/cnxk: support inline inbound queue") Cc: stable@dpdk.org Signed-off-by: Nithin Dabilpuram --- drivers/common/cnxk/roc_nix_inl_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/common/cnxk/roc_nix_inl_dev.c b/drivers/common/cnxk/roc_nix_inl_dev.c index bc3aa60ab91..1eb96f913a4 100644 --- a/drivers/common/cnxk/roc_nix_inl_dev.c +++ b/drivers/common/cnxk/roc_nix_inl_dev.c @@ -247,7 +247,7 @@ nix_inl_inb_queue_setup(struct nix_inl_dev *inl_dev, uint8_t slot_id) if (!cpt_req) { rc |= -ENOSPC; } else { - nix_req->enable = false; + cpt_req->enable = 0; rc |= mbox_process(mbox); } cpt_cfg_fail: From 8da4eaf43701b8b249435bd452867f1f2c97ca2e Mon Sep 17 00:00:00 2001 From: Aarnav JP Date: Thu, 13 Nov 2025 10:08:04 +0530 Subject: [PATCH 42/99] common/cnxk: fix format specifier for bandwidth profile ID Fix format specifier for bandwidth profile ID. Fixes: db5744d3cd23 ("common/cnxk: support NIX debug for CN20K") Cc: stable@dpdk.org Signed-off-by: Aarnav JP --- .mailmap | 1 + drivers/common/cnxk/roc_nix_debug.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index 2678c4a9ded..f9c5622e5dc 100644 --- a/.mailmap +++ b/.mailmap @@ -1,4 +1,5 @@ Aakash Sasidharan +Aarnav JP Aaro Koskinen Aaron Campbell Aaron Conole diff --git a/drivers/common/cnxk/roc_nix_debug.c b/drivers/common/cnxk/roc_nix_debug.c index f9294e693b8..11994bf131a 100644 --- a/drivers/common/cnxk/roc_nix_debug.c +++ b/drivers/common/cnxk/roc_nix_debug.c @@ -769,8 +769,8 @@ nix_lf_rq_dump(__io struct nix_cn20k_rq_ctx_s *ctx, FILE *file) nix_dump(file, "W2: xqe_hdr_split \t\t%d\nW2: xqe_imm_copy \t\t%d", ctx->xqe_hdr_split, ctx->xqe_imm_copy); - nix_dump(file, "W2: band_prof_id\t\t%d\n", - ((ctx->band_prof_id_h << 10) | ctx->band_prof_id_l)); + nix_dump(file, "W2: band_prof_id\t\t0x%" PRIx64 "\n", + (uint64_t)((ctx->band_prof_id_h << 10) | ctx->band_prof_id_l)); nix_dump(file, "W2: xqe_imm_size \t\t%d\nW2: later_skip \t\t\t%d", ctx->xqe_imm_size, ctx->later_skip); nix_dump(file, "W2: sso_bp_ena\t\t%d\n", ctx->sso_bp_ena); From b581982e3970c1c7c8a4f91a8c5bb040c9a733f6 Mon Sep 17 00:00:00 2001 From: Aarnav JP Date: Thu, 13 Nov 2025 10:08:05 +0530 Subject: [PATCH 43/99] common/cnxk: fix CPT result address config for inline Fix CPT res address config logic to avoid garbage values and trigger only when inline dev is present. Fixes: 3c31a7485172 ("common/cnxk: config CPT result address for CN20K") Cc: stable@dpdk.org Signed-off-by: Aarnav JP --- drivers/common/cnxk/roc_nix_inl.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/common/cnxk/roc_nix_inl.c b/drivers/common/cnxk/roc_nix_inl.c index 6700f556a0e..780f4cbbfc9 100644 --- a/drivers/common/cnxk/roc_nix_inl.c +++ b/drivers/common/cnxk/roc_nix_inl.c @@ -581,7 +581,7 @@ nix_inl_reass_inb_sa_tbl_setup(struct roc_nix *roc_nix) struct nix_inl_dev *inl_dev = NULL; uint64_t max_sa = 1, sa_pow2_sz; uint64_t sa_idx_w, lenm1_max; - uint64_t res_addr_offset; + uint64_t res_addr_offset = 0; uint64_t def_cptq = 0; size_t inb_sa_sz = 1; uint8_t profile_id; @@ -626,12 +626,11 @@ nix_inl_reass_inb_sa_tbl_setup(struct roc_nix *roc_nix) inl_dev = idev->nix_inl_dev; if (inl_dev->nb_inb_cptlfs) def_cptq = inl_dev->nix_inb_qids[inl_dev->inb_cpt_lf_id]; + res_addr_offset = (uint64_t)(inl_dev->res_addr_offset & 0xFF) << 48; + if (res_addr_offset) + res_addr_offset |= (1UL << 56); } - res_addr_offset = (uint64_t)(inl_dev->res_addr_offset & 0xFF) << 48; - if (res_addr_offset) - res_addr_offset |= (1UL << 56); - lf_cfg->enable = 1; lf_cfg->profile_id = profile_id; lf_cfg->rx_inline_sa_base = (uintptr_t)nix->inb_sa_base[profile_id]; From 9396a93a0d848edf0f32a4950418a8083e04924c Mon Sep 17 00:00:00 2001 From: Aarnav JP Date: Thu, 13 Nov 2025 10:08:06 +0530 Subject: [PATCH 44/99] common/cnxk: fix NIX Rx inject enabling Rx inject can only be supported with roc_nix. Fixes: 4b8eb5bd6627 ("common/cnxk: reserve CPT LF for Rx inject") Cc: stable@dpdk.org Signed-off-by: Aarnav JP --- drivers/common/cnxk/roc_nix_inl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/common/cnxk/roc_nix_inl.c b/drivers/common/cnxk/roc_nix_inl.c index 780f4cbbfc9..f8be98efd5b 100644 --- a/drivers/common/cnxk/roc_nix_inl.c +++ b/drivers/common/cnxk/roc_nix_inl.c @@ -849,12 +849,12 @@ roc_nix_inl_inb_rx_inject_enable(struct roc_nix *roc_nix, bool inb_inl_dev) if (inb_inl_dev) { inl_dev = idev->nix_inl_dev; - if (inl_dev && inl_dev->attach_cptlf && inl_dev->rx_inj_ena && + if (inl_dev && inl_dev->attach_cptlf && inl_dev->rx_inj_ena && roc_nix && roc_nix->rx_inj_ena) return true; } - return roc_nix->rx_inj_ena; + return roc_nix ? roc_nix->rx_inj_ena : 0; } uint32_t From 7d2c9dae103299edffbb436793ef499a6fdd9beb Mon Sep 17 00:00:00 2001 From: Aarnav JP Date: Thu, 13 Nov 2025 10:08:07 +0530 Subject: [PATCH 45/99] net/cnxk: fix Rx inject LF Fix illegal access on error. Fixes: 47cca253d605 ("net/cnxk: support Rx inject") Cc: stable@dpdk.org Signed-off-by: Aarnav JP --- drivers/net/cnxk/cn10k_ethdev_sec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/cnxk/cn10k_ethdev_sec.c b/drivers/net/cnxk/cn10k_ethdev_sec.c index f26830637f8..855bea17961 100644 --- a/drivers/net/cnxk/cn10k_ethdev_sec.c +++ b/drivers/net/cnxk/cn10k_ethdev_sec.c @@ -1336,6 +1336,8 @@ cn10k_eth_sec_rx_inject_config(void *device, uint16_t port_id, bool enable) roc_idev_nix_rx_inject_set(port_id, enable); inl_lf = roc_nix_inl_inb_inj_lf_get(nix); + if (!inl_lf) + return -ENOTSUP; sa_base = roc_nix_inl_inb_sa_base_get(nix, dev->inb.inl_dev); inj_cfg = &dev->inj_cfg; From b595d05bf739b5330cbfca00bdb984edea579a57 Mon Sep 17 00:00:00 2001 From: Aarnav JP Date: Thu, 13 Nov 2025 10:08:08 +0530 Subject: [PATCH 46/99] net/cnxk: fix default meter pre-color While converting to HW specific icolor, check for invalid default pre color. Fixes: ac35d4bf4cd6 ("net/cnxk: support ingress meter pre-color") Cc: stable@dpdk.org Signed-off-by: Aarnav JP --- drivers/net/cnxk/cnxk_ethdev_mtr.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/cnxk/cnxk_ethdev_mtr.c b/drivers/net/cnxk/cnxk_ethdev_mtr.c index edeca6dcc38..992e2d446ee 100644 --- a/drivers/net/cnxk/cnxk_ethdev_mtr.c +++ b/drivers/net/cnxk/cnxk_ethdev_mtr.c @@ -1261,7 +1261,13 @@ nix_mtr_config_map(struct cnxk_meter_node *mtr, struct roc_nix_bpf_cfg *cfg) cfg->alg = alg_map[profile->profile.alg]; cfg->lmode = profile->profile.packet_mode; - cfg->icolor = color_map[mtr->params.default_input_color]; + int idx = mtr->params.default_input_color; + + /* Index validation */ + if (idx >= RTE_COLORS) + cfg->icolor = ROC_NIX_BPF_COLOR_GREEN; + else + cfg->icolor = color_map[idx]; switch (RTE_MTR_COLOR_IN_PROTO_OUTER_IP) { case RTE_MTR_COLOR_IN_PROTO_OUTER_IP: From a273a0e6dc8c343f12d386bb123201d6e39a39f0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 14 Nov 2025 10:02:51 -0800 Subject: [PATCH 47/99] net/qede/base: remove unused file This file in base directory is not built in DPDK. No point in storing it in repository. Signed-off-by: Stephen Hemminger --- drivers/net/qede/base/ecore_mng_tlv.c | 1540 ------------------------- 1 file changed, 1540 deletions(-) delete mode 100644 drivers/net/qede/base/ecore_mng_tlv.c diff --git a/drivers/net/qede/base/ecore_mng_tlv.c b/drivers/net/qede/base/ecore_mng_tlv.c deleted file mode 100644 index f7666472d6e..00000000000 --- a/drivers/net/qede/base/ecore_mng_tlv.c +++ /dev/null @@ -1,1540 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright (c) 2016 - 2018 Cavium Inc. - * All rights reserved. - * www.cavium.com - */ - -#include "bcm_osal.h" -#include "ecore.h" -#include "ecore_status.h" -#include "ecore_mcp.h" -#include "ecore_hw.h" -#include "reg_addr.h" - -#define TLV_TYPE(p) (p[0]) -#define TLV_LENGTH(p) (p[1]) -#define TLV_FLAGS(p) (p[3]) - -static enum _ecore_status_t -ecore_mfw_get_tlv_group(u8 tlv_type, u8 *tlv_group) -{ - switch (tlv_type) { - case DRV_TLV_FEATURE_FLAGS: - case DRV_TLV_LOCAL_ADMIN_ADDR: - case DRV_TLV_ADDITIONAL_MAC_ADDR_1: - case DRV_TLV_ADDITIONAL_MAC_ADDR_2: - case DRV_TLV_OS_DRIVER_STATES: - case DRV_TLV_PXE_BOOT_PROGRESS: - case DRV_TLV_RX_FRAMES_RECEIVED: - case DRV_TLV_RX_BYTES_RECEIVED: - case DRV_TLV_TX_FRAMES_SENT: - case DRV_TLV_TX_BYTES_SENT: - *tlv_group |= ECORE_MFW_TLV_GENERIC; - break; - case DRV_TLV_LSO_MAX_OFFLOAD_SIZE: - case DRV_TLV_LSO_MIN_SEGMENT_COUNT: - case DRV_TLV_PROMISCUOUS_MODE: - case DRV_TLV_TX_DESCRIPTORS_QUEUE_SIZE: - case DRV_TLV_RX_DESCRIPTORS_QUEUE_SIZE: - case DRV_TLV_NUM_OF_NET_QUEUE_VMQ_CFG: - case DRV_TLV_NUM_OFFLOADED_CONNECTIONS_TCP_IPV4: - case DRV_TLV_NUM_OFFLOADED_CONNECTIONS_TCP_IPV6: - case DRV_TLV_TX_DESCRIPTOR_QUEUE_AVG_DEPTH: - case DRV_TLV_RX_DESCRIPTORS_QUEUE_AVG_DEPTH: - case DRV_TLV_IOV_OFFLOAD: - case DRV_TLV_TX_QUEUES_EMPTY: - case DRV_TLV_RX_QUEUES_EMPTY: - case DRV_TLV_TX_QUEUES_FULL: - case DRV_TLV_RX_QUEUES_FULL: - *tlv_group |= ECORE_MFW_TLV_ETH; - break; - case DRV_TLV_SCSI_TO: - case DRV_TLV_R_T_TOV: - case DRV_TLV_R_A_TOV: - case DRV_TLV_E_D_TOV: - case DRV_TLV_CR_TOV: - case DRV_TLV_BOOT_TYPE: - case DRV_TLV_NPIV_STATE: - case DRV_TLV_NUM_OF_NPIV_IDS: - case DRV_TLV_SWITCH_NAME: - case DRV_TLV_SWITCH_PORT_NUM: - case DRV_TLV_SWITCH_PORT_ID: - case DRV_TLV_VENDOR_NAME: - case DRV_TLV_SWITCH_MODEL: - case DRV_TLV_SWITCH_FW_VER: - case DRV_TLV_QOS_PRIORITY_PER_802_1P: - case DRV_TLV_PORT_ALIAS: - case DRV_TLV_PORT_STATE: - case DRV_TLV_FIP_TX_DESCRIPTORS_QUEUE_SIZE: - case DRV_TLV_FCOE_RX_DESCRIPTORS_QUEUE_SIZE: - case DRV_TLV_LINK_FAILURE_COUNT: - case DRV_TLV_FCOE_BOOT_PROGRESS: - case DRV_TLV_RX_BROADCAST_PACKETS: - case DRV_TLV_TX_BROADCAST_PACKETS: - case DRV_TLV_FCOE_TX_DESCRIPTOR_QUEUE_AVG_DEPTH: - case DRV_TLV_FCOE_RX_DESCRIPTORS_QUEUE_AVG_DEPTH: - case DRV_TLV_FCOE_RX_FRAMES_RECEIVED: - case DRV_TLV_FCOE_RX_BYTES_RECEIVED: - case DRV_TLV_FCOE_TX_FRAMES_SENT: - case DRV_TLV_FCOE_TX_BYTES_SENT: - case DRV_TLV_CRC_ERROR_COUNT: - case DRV_TLV_CRC_ERROR_1_RECEIVED_SOURCE_FC_ID: - case DRV_TLV_CRC_ERROR_1_TIMESTAMP: - case DRV_TLV_CRC_ERROR_2_RECEIVED_SOURCE_FC_ID: - case DRV_TLV_CRC_ERROR_2_TIMESTAMP: - case DRV_TLV_CRC_ERROR_3_RECEIVED_SOURCE_FC_ID: - case DRV_TLV_CRC_ERROR_3_TIMESTAMP: - case DRV_TLV_CRC_ERROR_4_RECEIVED_SOURCE_FC_ID: - case DRV_TLV_CRC_ERROR_4_TIMESTAMP: - case DRV_TLV_CRC_ERROR_5_RECEIVED_SOURCE_FC_ID: - case DRV_TLV_CRC_ERROR_5_TIMESTAMP: - case DRV_TLV_LOSS_OF_SYNC_ERROR_COUNT: - case DRV_TLV_LOSS_OF_SIGNAL_ERRORS: - case DRV_TLV_PRIMITIVE_SEQUENCE_PROTOCOL_ERROR_COUNT: - case DRV_TLV_DISPARITY_ERROR_COUNT: - case DRV_TLV_CODE_VIOLATION_ERROR_COUNT: - case DRV_TLV_LAST_FLOGI_ISSUED_COMMON_PARAMETERS_WORD_1: - case DRV_TLV_LAST_FLOGI_ISSUED_COMMON_PARAMETERS_WORD_2: - case DRV_TLV_LAST_FLOGI_ISSUED_COMMON_PARAMETERS_WORD_3: - case DRV_TLV_LAST_FLOGI_ISSUED_COMMON_PARAMETERS_WORD_4: - case DRV_TLV_LAST_FLOGI_TIMESTAMP: - case DRV_TLV_LAST_FLOGI_ACC_COMMON_PARAMETERS_WORD_1: - case DRV_TLV_LAST_FLOGI_ACC_COMMON_PARAMETERS_WORD_2: - case DRV_TLV_LAST_FLOGI_ACC_COMMON_PARAMETERS_WORD_3: - case DRV_TLV_LAST_FLOGI_ACC_COMMON_PARAMETERS_WORD_4: - case DRV_TLV_LAST_FLOGI_ACC_TIMESTAMP: - case DRV_TLV_LAST_FLOGI_RJT: - case DRV_TLV_LAST_FLOGI_RJT_TIMESTAMP: - case DRV_TLV_FDISCS_SENT_COUNT: - case DRV_TLV_FDISC_ACCS_RECEIVED: - case DRV_TLV_FDISC_RJTS_RECEIVED: - case DRV_TLV_PLOGI_SENT_COUNT: - case DRV_TLV_PLOGI_ACCS_RECEIVED: - case DRV_TLV_PLOGI_RJTS_RECEIVED: - case DRV_TLV_PLOGI_1_SENT_DESTINATION_FC_ID: - case DRV_TLV_PLOGI_1_TIMESTAMP: - case DRV_TLV_PLOGI_2_SENT_DESTINATION_FC_ID: - case DRV_TLV_PLOGI_2_TIMESTAMP: - case DRV_TLV_PLOGI_3_SENT_DESTINATION_FC_ID: - case DRV_TLV_PLOGI_3_TIMESTAMP: - case DRV_TLV_PLOGI_4_SENT_DESTINATION_FC_ID: - case DRV_TLV_PLOGI_4_TIMESTAMP: - case DRV_TLV_PLOGI_5_SENT_DESTINATION_FC_ID: - case DRV_TLV_PLOGI_5_TIMESTAMP: - case DRV_TLV_PLOGI_1_ACC_RECEIVED_SOURCE_FC_ID: - case DRV_TLV_PLOGI_1_ACC_TIMESTAMP: - case DRV_TLV_PLOGI_2_ACC_RECEIVED_SOURCE_FC_ID: - case DRV_TLV_PLOGI_2_ACC_TIMESTAMP: - case DRV_TLV_PLOGI_3_ACC_RECEIVED_SOURCE_FC_ID: - case DRV_TLV_PLOGI_3_ACC_TIMESTAMP: - case DRV_TLV_PLOGI_4_ACC_RECEIVED_SOURCE_FC_ID: - case DRV_TLV_PLOGI_4_ACC_TIMESTAMP: - case DRV_TLV_PLOGI_5_ACC_RECEIVED_SOURCE_FC_ID: - case DRV_TLV_PLOGI_5_ACC_TIMESTAMP: - case DRV_TLV_LOGOS_ISSUED: - case DRV_TLV_LOGO_ACCS_RECEIVED: - case DRV_TLV_LOGO_RJTS_RECEIVED: - case DRV_TLV_LOGO_1_RECEIVED_SOURCE_FC_ID: - case DRV_TLV_LOGO_1_TIMESTAMP: - case DRV_TLV_LOGO_2_RECEIVED_SOURCE_FC_ID: - case DRV_TLV_LOGO_2_TIMESTAMP: - case DRV_TLV_LOGO_3_RECEIVED_SOURCE_FC_ID: - case DRV_TLV_LOGO_3_TIMESTAMP: - case DRV_TLV_LOGO_4_RECEIVED_SOURCE_FC_ID: - case DRV_TLV_LOGO_4_TIMESTAMP: - case DRV_TLV_LOGO_5_RECEIVED_SOURCE_FC_ID: - case DRV_TLV_LOGO_5_TIMESTAMP: - case DRV_TLV_LOGOS_RECEIVED: - case DRV_TLV_ACCS_ISSUED: - case DRV_TLV_PRLIS_ISSUED: - case DRV_TLV_ACCS_RECEIVED: - case DRV_TLV_ABTS_SENT_COUNT: - case DRV_TLV_ABTS_ACCS_RECEIVED: - case DRV_TLV_ABTS_RJTS_RECEIVED: - case DRV_TLV_ABTS_1_SENT_DESTINATION_FC_ID: - case DRV_TLV_ABTS_1_TIMESTAMP: - case DRV_TLV_ABTS_2_SENT_DESTINATION_FC_ID: - case DRV_TLV_ABTS_2_TIMESTAMP: - case DRV_TLV_ABTS_3_SENT_DESTINATION_FC_ID: - case DRV_TLV_ABTS_3_TIMESTAMP: - case DRV_TLV_ABTS_4_SENT_DESTINATION_FC_ID: - case DRV_TLV_ABTS_4_TIMESTAMP: - case DRV_TLV_ABTS_5_SENT_DESTINATION_FC_ID: - case DRV_TLV_ABTS_5_TIMESTAMP: - case DRV_TLV_RSCNS_RECEIVED: - case DRV_TLV_LAST_RSCN_RECEIVED_N_PORT_1: - case DRV_TLV_LAST_RSCN_RECEIVED_N_PORT_2: - case DRV_TLV_LAST_RSCN_RECEIVED_N_PORT_3: - case DRV_TLV_LAST_RSCN_RECEIVED_N_PORT_4: - case DRV_TLV_LUN_RESETS_ISSUED: - case DRV_TLV_ABORT_TASK_SETS_ISSUED: - case DRV_TLV_TPRLOS_SENT: - case DRV_TLV_NOS_SENT_COUNT: - case DRV_TLV_NOS_RECEIVED_COUNT: - case DRV_TLV_OLS_COUNT: - case DRV_TLV_LR_COUNT: - case DRV_TLV_LRR_COUNT: - case DRV_TLV_LIP_SENT_COUNT: - case DRV_TLV_LIP_RECEIVED_COUNT: - case DRV_TLV_EOFA_COUNT: - case DRV_TLV_EOFNI_COUNT: - case DRV_TLV_SCSI_STATUS_CHECK_CONDITION_COUNT: - case DRV_TLV_SCSI_STATUS_CONDITION_MET_COUNT: - case DRV_TLV_SCSI_STATUS_BUSY_COUNT: - case DRV_TLV_SCSI_STATUS_INTERMEDIATE_COUNT: - case DRV_TLV_SCSI_STATUS_INTERMEDIATE_CONDITION_MET_COUNT: - case DRV_TLV_SCSI_STATUS_RESERVATION_CONFLICT_COUNT: - case DRV_TLV_SCSI_STATUS_TASK_SET_FULL_COUNT: - case DRV_TLV_SCSI_STATUS_ACA_ACTIVE_COUNT: - case DRV_TLV_SCSI_STATUS_TASK_ABORTED_COUNT: - case DRV_TLV_SCSI_CHECK_CONDITION_1_RECEIVED_SK_ASC_ASCQ: - case DRV_TLV_SCSI_CHECK_1_TIMESTAMP: - case DRV_TLV_SCSI_CHECK_CONDITION_2_RECEIVED_SK_ASC_ASCQ: - case DRV_TLV_SCSI_CHECK_2_TIMESTAMP: - case DRV_TLV_SCSI_CHECK_CONDITION_3_RECEIVED_SK_ASC_ASCQ: - case DRV_TLV_SCSI_CHECK_3_TIMESTAMP: - case DRV_TLV_SCSI_CHECK_CONDITION_4_RECEIVED_SK_ASC_ASCQ: - case DRV_TLV_SCSI_CHECK_4_TIMESTAMP: - case DRV_TLV_SCSI_CHECK_CONDITION_5_RECEIVED_SK_ASC_ASCQ: - case DRV_TLV_SCSI_CHECK_5_TIMESTAMP: - *tlv_group = ECORE_MFW_TLV_FCOE; - break; - case DRV_TLV_TARGET_LLMNR_ENABLED: - case DRV_TLV_HEADER_DIGEST_FLAG_ENABLED: - case DRV_TLV_DATA_DIGEST_FLAG_ENABLED: - case DRV_TLV_AUTHENTICATION_METHOD: - case DRV_TLV_ISCSI_BOOT_TARGET_PORTAL: - case DRV_TLV_MAX_FRAME_SIZE: - case DRV_TLV_PDU_TX_DESCRIPTORS_QUEUE_SIZE: - case DRV_TLV_PDU_RX_DESCRIPTORS_QUEUE_SIZE: - case DRV_TLV_ISCSI_BOOT_PROGRESS: - case DRV_TLV_PDU_TX_DESCRIPTOR_QUEUE_AVG_DEPTH: - case DRV_TLV_PDU_RX_DESCRIPTORS_QUEUE_AVG_DEPTH: - case DRV_TLV_ISCSI_PDU_RX_FRAMES_RECEIVED: - case DRV_TLV_ISCSI_PDU_RX_BYTES_RECEIVED: - case DRV_TLV_ISCSI_PDU_TX_FRAMES_SENT: - case DRV_TLV_ISCSI_PDU_TX_BYTES_SENT: - *tlv_group |= ECORE_MFW_TLV_ISCSI; - break; - default: - return ECORE_INVAL; - } - - return ECORE_SUCCESS; -} - -static int -ecore_mfw_get_gen_tlv_value(struct ecore_drv_tlv_hdr *p_tlv, - struct ecore_mfw_tlv_generic *p_drv_buf, - u8 **p_tlv_buf) -{ - switch (p_tlv->tlv_type) { - case DRV_TLV_FEATURE_FLAGS: - if (p_drv_buf->feat_flags_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->feat_flags; - return sizeof(p_drv_buf->feat_flags); - } - break; - case DRV_TLV_LOCAL_ADMIN_ADDR: - if (p_drv_buf->local_mac_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->local_mac; - return sizeof(p_drv_buf->local_mac); - } - break; - case DRV_TLV_ADDITIONAL_MAC_ADDR_1: - if (p_drv_buf->additional_mac1_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->additional_mac1; - return sizeof(p_drv_buf->additional_mac1); - } - break; - case DRV_TLV_ADDITIONAL_MAC_ADDR_2: - if (p_drv_buf->additional_mac2_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->additional_mac2; - return sizeof(p_drv_buf->additional_mac2); - } - break; - case DRV_TLV_OS_DRIVER_STATES: - if (p_drv_buf->drv_state_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->drv_state; - return sizeof(p_drv_buf->drv_state); - } - break; - case DRV_TLV_PXE_BOOT_PROGRESS: - if (p_drv_buf->pxe_progress_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->pxe_progress; - return sizeof(p_drv_buf->pxe_progress); - } - break; - case DRV_TLV_RX_FRAMES_RECEIVED: - if (p_drv_buf->rx_frames_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_frames; - return sizeof(p_drv_buf->rx_frames); - } - break; - case DRV_TLV_RX_BYTES_RECEIVED: - if (p_drv_buf->rx_bytes_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_bytes; - return sizeof(p_drv_buf->rx_bytes); - } - break; - case DRV_TLV_TX_FRAMES_SENT: - if (p_drv_buf->tx_frames_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->tx_frames; - return sizeof(p_drv_buf->tx_frames); - } - break; - case DRV_TLV_TX_BYTES_SENT: - if (p_drv_buf->tx_bytes_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->tx_bytes; - return sizeof(p_drv_buf->tx_bytes); - } - break; - default: - break; - } - - return -1; -} - -static int -ecore_mfw_get_eth_tlv_value(struct ecore_drv_tlv_hdr *p_tlv, - struct ecore_mfw_tlv_eth *p_drv_buf, - u8 **p_tlv_buf) -{ - switch (p_tlv->tlv_type) { - case DRV_TLV_LSO_MAX_OFFLOAD_SIZE: - if (p_drv_buf->lso_maxoff_size_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->lso_maxoff_size; - return sizeof(p_drv_buf->lso_maxoff_size); - } - break; - case DRV_TLV_LSO_MIN_SEGMENT_COUNT: - if (p_drv_buf->lso_minseg_size_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->lso_minseg_size; - return sizeof(p_drv_buf->lso_minseg_size); - } - break; - case DRV_TLV_PROMISCUOUS_MODE: - if (p_drv_buf->prom_mode_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->prom_mode; - return sizeof(p_drv_buf->prom_mode); - } - break; - case DRV_TLV_TX_DESCRIPTORS_QUEUE_SIZE: - if (p_drv_buf->tx_descr_size_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->tx_descr_size; - return sizeof(p_drv_buf->tx_descr_size); - } - break; - case DRV_TLV_RX_DESCRIPTORS_QUEUE_SIZE: - if (p_drv_buf->rx_descr_size_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_descr_size; - return sizeof(p_drv_buf->rx_descr_size); - } - break; - case DRV_TLV_NUM_OF_NET_QUEUE_VMQ_CFG: - if (p_drv_buf->netq_count_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->netq_count; - return sizeof(p_drv_buf->netq_count); - } - break; - case DRV_TLV_NUM_OFFLOADED_CONNECTIONS_TCP_IPV4: - if (p_drv_buf->tcp4_offloads_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->tcp4_offloads; - return sizeof(p_drv_buf->tcp4_offloads); - } - break; - case DRV_TLV_NUM_OFFLOADED_CONNECTIONS_TCP_IPV6: - if (p_drv_buf->tcp6_offloads_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->tcp6_offloads; - return sizeof(p_drv_buf->tcp6_offloads); - } - break; - case DRV_TLV_TX_DESCRIPTOR_QUEUE_AVG_DEPTH: - if (p_drv_buf->tx_descr_qdepth_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->tx_descr_qdepth; - return sizeof(p_drv_buf->tx_descr_qdepth); - } - break; - case DRV_TLV_RX_DESCRIPTORS_QUEUE_AVG_DEPTH: - if (p_drv_buf->rx_descr_qdepth_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_descr_qdepth; - return sizeof(p_drv_buf->rx_descr_qdepth); - } - break; - case DRV_TLV_IOV_OFFLOAD: - if (p_drv_buf->iov_offload_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->iov_offload; - return sizeof(p_drv_buf->iov_offload); - } - break; - case DRV_TLV_TX_QUEUES_EMPTY: - if (p_drv_buf->txqs_empty_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->txqs_empty; - return sizeof(p_drv_buf->txqs_empty); - } - break; - case DRV_TLV_RX_QUEUES_EMPTY: - if (p_drv_buf->rxqs_empty_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->rxqs_empty; - return sizeof(p_drv_buf->rxqs_empty); - } - break; - case DRV_TLV_TX_QUEUES_FULL: - if (p_drv_buf->num_txqs_full_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->num_txqs_full; - return sizeof(p_drv_buf->num_txqs_full); - } - break; - case DRV_TLV_RX_QUEUES_FULL: - if (p_drv_buf->num_rxqs_full_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->num_rxqs_full; - return sizeof(p_drv_buf->num_rxqs_full); - } - break; - default: - break; - } - - return -1; -} - -static int -ecore_mfw_get_fcoe_tlv_value(struct ecore_drv_tlv_hdr *p_tlv, - struct ecore_mfw_tlv_fcoe *p_drv_buf, - u8 **p_tlv_buf) -{ - switch (p_tlv->tlv_type) { - case DRV_TLV_SCSI_TO: - if (p_drv_buf->scsi_timeout_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_timeout; - return sizeof(p_drv_buf->scsi_timeout); - } - break; - case DRV_TLV_R_T_TOV: - if (p_drv_buf->rt_tov_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->rt_tov; - return sizeof(p_drv_buf->rt_tov); - } - break; - case DRV_TLV_R_A_TOV: - if (p_drv_buf->ra_tov_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->ra_tov; - return sizeof(p_drv_buf->ra_tov); - } - break; - case DRV_TLV_E_D_TOV: - if (p_drv_buf->ed_tov_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->ed_tov; - return sizeof(p_drv_buf->ed_tov); - } - break; - case DRV_TLV_CR_TOV: - if (p_drv_buf->cr_tov_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->cr_tov; - return sizeof(p_drv_buf->cr_tov); - } - break; - case DRV_TLV_BOOT_TYPE: - if (p_drv_buf->boot_type_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->boot_type; - return sizeof(p_drv_buf->boot_type); - } - break; - case DRV_TLV_NPIV_STATE: - if (p_drv_buf->npiv_state_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->npiv_state; - return sizeof(p_drv_buf->npiv_state); - } - break; - case DRV_TLV_NUM_OF_NPIV_IDS: - if (p_drv_buf->num_npiv_ids_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->num_npiv_ids; - return sizeof(p_drv_buf->num_npiv_ids); - } - break; - case DRV_TLV_SWITCH_NAME: - if (p_drv_buf->switch_name_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->switch_name; - return sizeof(p_drv_buf->switch_name); - } - break; - case DRV_TLV_SWITCH_PORT_NUM: - if (p_drv_buf->switch_portnum_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->switch_portnum; - return sizeof(p_drv_buf->switch_portnum); - } - break; - case DRV_TLV_SWITCH_PORT_ID: - if (p_drv_buf->switch_portid_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->switch_portid; - return sizeof(p_drv_buf->switch_portid); - } - break; - case DRV_TLV_VENDOR_NAME: - if (p_drv_buf->vendor_name_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->vendor_name; - return sizeof(p_drv_buf->vendor_name); - } - break; - case DRV_TLV_SWITCH_MODEL: - if (p_drv_buf->switch_model_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->switch_model; - return sizeof(p_drv_buf->switch_model); - } - break; - case DRV_TLV_SWITCH_FW_VER: - if (p_drv_buf->switch_fw_version_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->switch_fw_version; - return sizeof(p_drv_buf->switch_fw_version); - } - break; - case DRV_TLV_QOS_PRIORITY_PER_802_1P: - if (p_drv_buf->qos_pri_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->qos_pri; - return sizeof(p_drv_buf->qos_pri); - } - break; - case DRV_TLV_PORT_ALIAS: - if (p_drv_buf->port_alias_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->port_alias; - return sizeof(p_drv_buf->port_alias); - } - break; - case DRV_TLV_PORT_STATE: - if (p_drv_buf->port_state_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->port_state; - return sizeof(p_drv_buf->port_state); - } - break; - case DRV_TLV_FIP_TX_DESCRIPTORS_QUEUE_SIZE: - if (p_drv_buf->fip_tx_descr_size_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->fip_tx_descr_size; - return sizeof(p_drv_buf->fip_tx_descr_size); - } - break; - case DRV_TLV_FCOE_RX_DESCRIPTORS_QUEUE_SIZE: - if (p_drv_buf->fip_rx_descr_size_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->fip_rx_descr_size; - return sizeof(p_drv_buf->fip_rx_descr_size); - } - break; - case DRV_TLV_LINK_FAILURE_COUNT: - if (p_drv_buf->link_failures_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->link_failures; - return sizeof(p_drv_buf->link_failures); - } - break; - case DRV_TLV_FCOE_BOOT_PROGRESS: - if (p_drv_buf->fcoe_boot_progress_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->fcoe_boot_progress; - return sizeof(p_drv_buf->fcoe_boot_progress); - } - break; - case DRV_TLV_RX_BROADCAST_PACKETS: - if (p_drv_buf->rx_bcast_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_bcast; - return sizeof(p_drv_buf->rx_bcast); - } - break; - case DRV_TLV_TX_BROADCAST_PACKETS: - if (p_drv_buf->tx_bcast_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->tx_bcast; - return sizeof(p_drv_buf->tx_bcast); - } - break; - case DRV_TLV_FCOE_TX_DESCRIPTOR_QUEUE_AVG_DEPTH: - if (p_drv_buf->fcoe_txq_depth_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->fcoe_txq_depth; - return sizeof(p_drv_buf->fcoe_txq_depth); - } - break; - case DRV_TLV_FCOE_RX_DESCRIPTORS_QUEUE_AVG_DEPTH: - if (p_drv_buf->fcoe_rxq_depth_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->fcoe_rxq_depth; - return sizeof(p_drv_buf->fcoe_rxq_depth); - } - break; - case DRV_TLV_FCOE_RX_FRAMES_RECEIVED: - if (p_drv_buf->fcoe_rx_frames_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->fcoe_rx_frames; - return sizeof(p_drv_buf->fcoe_rx_frames); - } - break; - case DRV_TLV_FCOE_RX_BYTES_RECEIVED: - if (p_drv_buf->fcoe_rx_bytes_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->fcoe_rx_bytes; - return sizeof(p_drv_buf->fcoe_rx_bytes); - } - break; - case DRV_TLV_FCOE_TX_FRAMES_SENT: - if (p_drv_buf->fcoe_tx_frames_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->fcoe_tx_frames; - return sizeof(p_drv_buf->fcoe_tx_frames); - } - break; - case DRV_TLV_FCOE_TX_BYTES_SENT: - if (p_drv_buf->fcoe_tx_bytes_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->fcoe_tx_bytes; - return sizeof(p_drv_buf->fcoe_tx_bytes); - } - break; - case DRV_TLV_CRC_ERROR_COUNT: - if (p_drv_buf->crc_count_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->crc_count; - return sizeof(p_drv_buf->crc_count); - } - break; - case DRV_TLV_CRC_ERROR_1_RECEIVED_SOURCE_FC_ID: - if (p_drv_buf->crc_err_src_fcid_set[0]) { - *p_tlv_buf = (u8 *)&p_drv_buf->crc_err_src_fcid[0]; - return sizeof(p_drv_buf->crc_err_src_fcid[0]); - } - break; - case DRV_TLV_CRC_ERROR_2_RECEIVED_SOURCE_FC_ID: - if (p_drv_buf->crc_err_src_fcid_set[1]) { - *p_tlv_buf = (u8 *)&p_drv_buf->crc_err_src_fcid[1]; - return sizeof(p_drv_buf->crc_err_src_fcid[1]); - } - break; - case DRV_TLV_CRC_ERROR_3_RECEIVED_SOURCE_FC_ID: - if (p_drv_buf->crc_err_src_fcid_set[2]) { - *p_tlv_buf = (u8 *)&p_drv_buf->crc_err_src_fcid[2]; - return sizeof(p_drv_buf->crc_err_src_fcid[2]); - } - break; - case DRV_TLV_CRC_ERROR_4_RECEIVED_SOURCE_FC_ID: - if (p_drv_buf->crc_err_src_fcid_set[3]) { - *p_tlv_buf = (u8 *)&p_drv_buf->crc_err_src_fcid[3]; - return sizeof(p_drv_buf->crc_err_src_fcid[3]); - } - break; - case DRV_TLV_CRC_ERROR_5_RECEIVED_SOURCE_FC_ID: - if (p_drv_buf->crc_err_src_fcid_set[4]) { - *p_tlv_buf = (u8 *)&p_drv_buf->crc_err_src_fcid[4]; - return sizeof(p_drv_buf->crc_err_src_fcid[4]); - } - break; - case DRV_TLV_CRC_ERROR_1_TIMESTAMP: - if (p_drv_buf->crc_err_tstamp_set[0]) { - *p_tlv_buf = (u8 *)&p_drv_buf->crc_err_tstamp[0]; - return sizeof(p_drv_buf->crc_err_tstamp[0]); - } - break; - case DRV_TLV_CRC_ERROR_2_TIMESTAMP: - if (p_drv_buf->crc_err_tstamp_set[1]) { - *p_tlv_buf = (u8 *)&p_drv_buf->crc_err_tstamp[1]; - return sizeof(p_drv_buf->crc_err_tstamp[1]); - } - break; - case DRV_TLV_CRC_ERROR_3_TIMESTAMP: - if (p_drv_buf->crc_err_tstamp_set[2]) { - *p_tlv_buf = (u8 *)&p_drv_buf->crc_err_tstamp[2]; - return sizeof(p_drv_buf->crc_err_tstamp[2]); - } - break; - case DRV_TLV_CRC_ERROR_4_TIMESTAMP: - if (p_drv_buf->crc_err_tstamp_set[3]) { - *p_tlv_buf = (u8 *)&p_drv_buf->crc_err_tstamp[3]; - return sizeof(p_drv_buf->crc_err_tstamp[3]); - } - break; - case DRV_TLV_CRC_ERROR_5_TIMESTAMP: - if (p_drv_buf->crc_err_tstamp_set[4]) { - *p_tlv_buf = (u8 *)&p_drv_buf->crc_err_tstamp[4]; - return sizeof(p_drv_buf->crc_err_tstamp[4]); - } - break; - case DRV_TLV_LOSS_OF_SYNC_ERROR_COUNT: - if (p_drv_buf->losync_err_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->losync_err; - return sizeof(p_drv_buf->losync_err); - } - break; - case DRV_TLV_LOSS_OF_SIGNAL_ERRORS: - if (p_drv_buf->losig_err_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->losig_err; - return sizeof(p_drv_buf->losig_err); - } - break; - case DRV_TLV_PRIMITIVE_SEQUENCE_PROTOCOL_ERROR_COUNT: - if (p_drv_buf->primtive_err_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->primtive_err; - return sizeof(p_drv_buf->primtive_err); - } - break; - case DRV_TLV_DISPARITY_ERROR_COUNT: - if (p_drv_buf->disparity_err_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->disparity_err; - return sizeof(p_drv_buf->disparity_err); - } - break; - case DRV_TLV_CODE_VIOLATION_ERROR_COUNT: - if (p_drv_buf->code_violation_err_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->code_violation_err; - return sizeof(p_drv_buf->code_violation_err); - } - break; - case DRV_TLV_LAST_FLOGI_ISSUED_COMMON_PARAMETERS_WORD_1: - if (p_drv_buf->flogi_param_set[0]) { - *p_tlv_buf = (u8 *)&p_drv_buf->flogi_param[0]; - return sizeof(p_drv_buf->flogi_param[0]); - } - break; - case DRV_TLV_LAST_FLOGI_ISSUED_COMMON_PARAMETERS_WORD_2: - if (p_drv_buf->flogi_param_set[1]) { - *p_tlv_buf = (u8 *)&p_drv_buf->flogi_param[1]; - return sizeof(p_drv_buf->flogi_param[1]); - } - break; - case DRV_TLV_LAST_FLOGI_ISSUED_COMMON_PARAMETERS_WORD_3: - if (p_drv_buf->flogi_param_set[2]) { - *p_tlv_buf = (u8 *)&p_drv_buf->flogi_param[2]; - return sizeof(p_drv_buf->flogi_param[2]); - } - break; - case DRV_TLV_LAST_FLOGI_ISSUED_COMMON_PARAMETERS_WORD_4: - if (p_drv_buf->flogi_param_set[3]) { - *p_tlv_buf = (u8 *)&p_drv_buf->flogi_param[3]; - return sizeof(p_drv_buf->flogi_param[3]); - } - break; - case DRV_TLV_LAST_FLOGI_TIMESTAMP: - if (p_drv_buf->flogi_tstamp_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->flogi_tstamp; - return sizeof(p_drv_buf->flogi_tstamp); - } - break; - case DRV_TLV_LAST_FLOGI_ACC_COMMON_PARAMETERS_WORD_1: - if (p_drv_buf->flogi_acc_param_set[0]) { - *p_tlv_buf = (u8 *)&p_drv_buf->flogi_acc_param[0]; - return sizeof(p_drv_buf->flogi_acc_param[0]); - } - break; - case DRV_TLV_LAST_FLOGI_ACC_COMMON_PARAMETERS_WORD_2: - if (p_drv_buf->flogi_acc_param_set[1]) { - *p_tlv_buf = (u8 *)&p_drv_buf->flogi_acc_param[1]; - return sizeof(p_drv_buf->flogi_acc_param[1]); - } - break; - case DRV_TLV_LAST_FLOGI_ACC_COMMON_PARAMETERS_WORD_3: - if (p_drv_buf->flogi_acc_param_set[2]) { - *p_tlv_buf = (u8 *)&p_drv_buf->flogi_acc_param[2]; - return sizeof(p_drv_buf->flogi_acc_param[2]); - } - break; - case DRV_TLV_LAST_FLOGI_ACC_COMMON_PARAMETERS_WORD_4: - if (p_drv_buf->flogi_acc_param_set[3]) { - *p_tlv_buf = (u8 *)&p_drv_buf->flogi_acc_param[3]; - return sizeof(p_drv_buf->flogi_acc_param[3]); - } - break; - case DRV_TLV_LAST_FLOGI_ACC_TIMESTAMP: - if (p_drv_buf->flogi_acc_tstamp_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->flogi_acc_tstamp; - return sizeof(p_drv_buf->flogi_acc_tstamp); - } - break; - case DRV_TLV_LAST_FLOGI_RJT: - if (p_drv_buf->flogi_rjt_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->flogi_rjt; - return sizeof(p_drv_buf->flogi_rjt); - } - break; - case DRV_TLV_LAST_FLOGI_RJT_TIMESTAMP: - if (p_drv_buf->flogi_rjt_tstamp_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->flogi_rjt_tstamp; - return sizeof(p_drv_buf->flogi_rjt_tstamp); - } - break; - case DRV_TLV_FDISCS_SENT_COUNT: - if (p_drv_buf->fdiscs_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->fdiscs; - return sizeof(p_drv_buf->fdiscs); - } - break; - case DRV_TLV_FDISC_ACCS_RECEIVED: - if (p_drv_buf->fdisc_acc_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->fdisc_acc; - return sizeof(p_drv_buf->fdisc_acc); - } - break; - case DRV_TLV_FDISC_RJTS_RECEIVED: - if (p_drv_buf->fdisc_rjt_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->fdisc_rjt; - return sizeof(p_drv_buf->fdisc_rjt); - } - break; - case DRV_TLV_PLOGI_SENT_COUNT: - if (p_drv_buf->plogi_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi; - return sizeof(p_drv_buf->plogi); - } - break; - case DRV_TLV_PLOGI_ACCS_RECEIVED: - if (p_drv_buf->plogi_acc_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_acc; - return sizeof(p_drv_buf->plogi_acc); - } - break; - case DRV_TLV_PLOGI_RJTS_RECEIVED: - if (p_drv_buf->plogi_rjt_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_rjt; - return sizeof(p_drv_buf->plogi_rjt); - } - break; - case DRV_TLV_PLOGI_1_SENT_DESTINATION_FC_ID: - if (p_drv_buf->plogi_dst_fcid_set[0]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_dst_fcid[0]; - return sizeof(p_drv_buf->plogi_dst_fcid[0]); - } - break; - case DRV_TLV_PLOGI_2_SENT_DESTINATION_FC_ID: - if (p_drv_buf->plogi_dst_fcid_set[1]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_dst_fcid[1]; - return sizeof(p_drv_buf->plogi_dst_fcid[1]); - } - break; - case DRV_TLV_PLOGI_3_SENT_DESTINATION_FC_ID: - if (p_drv_buf->plogi_dst_fcid_set[2]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_dst_fcid[2]; - return sizeof(p_drv_buf->plogi_dst_fcid[2]); - } - break; - case DRV_TLV_PLOGI_4_SENT_DESTINATION_FC_ID: - if (p_drv_buf->plogi_dst_fcid_set[3]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_dst_fcid[3]; - return sizeof(p_drv_buf->plogi_dst_fcid[3]); - } - break; - case DRV_TLV_PLOGI_5_SENT_DESTINATION_FC_ID: - if (p_drv_buf->plogi_dst_fcid_set[4]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_dst_fcid[4]; - return sizeof(p_drv_buf->plogi_dst_fcid[4]); - } - break; - case DRV_TLV_PLOGI_1_TIMESTAMP: - if (p_drv_buf->plogi_tstamp_set[0]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_tstamp[0]; - return sizeof(p_drv_buf->plogi_tstamp[0]); - } - break; - case DRV_TLV_PLOGI_2_TIMESTAMP: - if (p_drv_buf->plogi_tstamp_set[1]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_tstamp[1]; - return sizeof(p_drv_buf->plogi_tstamp[1]); - } - break; - case DRV_TLV_PLOGI_3_TIMESTAMP: - if (p_drv_buf->plogi_tstamp_set[2]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_tstamp[2]; - return sizeof(p_drv_buf->plogi_tstamp[2]); - } - break; - case DRV_TLV_PLOGI_4_TIMESTAMP: - if (p_drv_buf->plogi_tstamp_set[3]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_tstamp[3]; - return sizeof(p_drv_buf->plogi_tstamp[3]); - } - break; - case DRV_TLV_PLOGI_5_TIMESTAMP: - if (p_drv_buf->plogi_tstamp_set[4]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_tstamp[4]; - return sizeof(p_drv_buf->plogi_tstamp[4]); - } - break; - case DRV_TLV_PLOGI_1_ACC_RECEIVED_SOURCE_FC_ID: - if (p_drv_buf->plogi_acc_src_fcid_set[0]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_acc_src_fcid[0]; - return sizeof(p_drv_buf->plogi_acc_src_fcid[0]); - } - break; - case DRV_TLV_PLOGI_2_ACC_RECEIVED_SOURCE_FC_ID: - if (p_drv_buf->plogi_acc_src_fcid_set[1]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_acc_src_fcid[1]; - return sizeof(p_drv_buf->plogi_acc_src_fcid[1]); - } - break; - case DRV_TLV_PLOGI_3_ACC_RECEIVED_SOURCE_FC_ID: - if (p_drv_buf->plogi_acc_src_fcid_set[2]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_acc_src_fcid[2]; - return sizeof(p_drv_buf->plogi_acc_src_fcid[2]); - } - break; - case DRV_TLV_PLOGI_4_ACC_RECEIVED_SOURCE_FC_ID: - if (p_drv_buf->plogi_acc_src_fcid_set[3]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_acc_src_fcid[3]; - return sizeof(p_drv_buf->plogi_acc_src_fcid[3]); - } - break; - case DRV_TLV_PLOGI_5_ACC_RECEIVED_SOURCE_FC_ID: - if (p_drv_buf->plogi_acc_src_fcid_set[4]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_acc_src_fcid[4]; - return sizeof(p_drv_buf->plogi_acc_src_fcid[4]); - } - break; - case DRV_TLV_PLOGI_1_ACC_TIMESTAMP: - if (p_drv_buf->plogi_acc_tstamp_set[0]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_acc_tstamp[0]; - return sizeof(p_drv_buf->plogi_acc_tstamp[0]); - } - break; - case DRV_TLV_PLOGI_2_ACC_TIMESTAMP: - if (p_drv_buf->plogi_acc_tstamp_set[1]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_acc_tstamp[1]; - return sizeof(p_drv_buf->plogi_acc_tstamp[1]); - } - break; - case DRV_TLV_PLOGI_3_ACC_TIMESTAMP: - if (p_drv_buf->plogi_acc_tstamp_set[2]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_acc_tstamp[2]; - return sizeof(p_drv_buf->plogi_acc_tstamp[2]); - } - break; - case DRV_TLV_PLOGI_4_ACC_TIMESTAMP: - if (p_drv_buf->plogi_acc_tstamp_set[3]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_acc_tstamp[3]; - return sizeof(p_drv_buf->plogi_acc_tstamp[3]); - } - break; - case DRV_TLV_PLOGI_5_ACC_TIMESTAMP: - if (p_drv_buf->plogi_acc_tstamp_set[4]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogi_acc_tstamp[4]; - return sizeof(p_drv_buf->plogi_acc_tstamp[4]); - } - break; - case DRV_TLV_LOGOS_ISSUED: - if (p_drv_buf->tx_plogos_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->tx_plogos; - return sizeof(p_drv_buf->tx_plogos); - } - break; - case DRV_TLV_LOGO_ACCS_RECEIVED: - if (p_drv_buf->plogo_acc_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogo_acc; - return sizeof(p_drv_buf->plogo_acc); - } - break; - case DRV_TLV_LOGO_RJTS_RECEIVED: - if (p_drv_buf->plogo_rjt_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogo_rjt; - return sizeof(p_drv_buf->plogo_rjt); - } - break; - case DRV_TLV_LOGO_1_RECEIVED_SOURCE_FC_ID: - if (p_drv_buf->plogo_src_fcid_set[0]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogo_src_fcid[0]; - return sizeof(p_drv_buf->plogo_src_fcid[0]); - } - break; - case DRV_TLV_LOGO_2_RECEIVED_SOURCE_FC_ID: - if (p_drv_buf->plogo_src_fcid_set[1]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogo_src_fcid[1]; - return sizeof(p_drv_buf->plogo_src_fcid[1]); - } - break; - case DRV_TLV_LOGO_3_RECEIVED_SOURCE_FC_ID: - if (p_drv_buf->plogo_src_fcid_set[2]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogo_src_fcid[2]; - return sizeof(p_drv_buf->plogo_src_fcid[2]); - } - break; - case DRV_TLV_LOGO_4_RECEIVED_SOURCE_FC_ID: - if (p_drv_buf->plogo_src_fcid_set[3]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogo_src_fcid[3]; - return sizeof(p_drv_buf->plogo_src_fcid[3]); - } - break; - case DRV_TLV_LOGO_5_RECEIVED_SOURCE_FC_ID: - if (p_drv_buf->plogo_src_fcid_set[4]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogo_src_fcid[4]; - return sizeof(p_drv_buf->plogo_src_fcid[4]); - } - break; - case DRV_TLV_LOGO_1_TIMESTAMP: - if (p_drv_buf->plogo_tstamp_set[0]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogo_tstamp[0]; - return sizeof(p_drv_buf->plogo_tstamp[0]); - } - break; - case DRV_TLV_LOGO_2_TIMESTAMP: - if (p_drv_buf->plogo_tstamp_set[1]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogo_tstamp[1]; - return sizeof(p_drv_buf->plogo_tstamp[1]); - } - break; - case DRV_TLV_LOGO_3_TIMESTAMP: - if (p_drv_buf->plogo_tstamp_set[2]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogo_tstamp[2]; - return sizeof(p_drv_buf->plogo_tstamp[2]); - } - break; - case DRV_TLV_LOGO_4_TIMESTAMP: - if (p_drv_buf->plogo_tstamp_set[3]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogo_tstamp[3]; - return sizeof(p_drv_buf->plogo_tstamp[3]); - } - break; - case DRV_TLV_LOGO_5_TIMESTAMP: - if (p_drv_buf->plogo_tstamp_set[4]) { - *p_tlv_buf = (u8 *)&p_drv_buf->plogo_tstamp[4]; - return sizeof(p_drv_buf->plogo_tstamp[4]); - } - break; - case DRV_TLV_LOGOS_RECEIVED: - if (p_drv_buf->rx_logos_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_logos; - return sizeof(p_drv_buf->rx_logos); - } - break; - case DRV_TLV_ACCS_ISSUED: - if (p_drv_buf->tx_accs_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->tx_accs; - return sizeof(p_drv_buf->tx_accs); - } - break; - case DRV_TLV_PRLIS_ISSUED: - if (p_drv_buf->tx_prlis_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->tx_prlis; - return sizeof(p_drv_buf->tx_prlis); - } - break; - case DRV_TLV_ACCS_RECEIVED: - if (p_drv_buf->rx_accs_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_accs; - return sizeof(p_drv_buf->rx_accs); - } - break; - case DRV_TLV_ABTS_SENT_COUNT: - if (p_drv_buf->tx_abts_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->tx_abts; - return sizeof(p_drv_buf->tx_abts); - } - break; - case DRV_TLV_ABTS_ACCS_RECEIVED: - if (p_drv_buf->rx_abts_acc_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_abts_acc; - return sizeof(p_drv_buf->rx_abts_acc); - } - break; - case DRV_TLV_ABTS_RJTS_RECEIVED: - if (p_drv_buf->rx_abts_rjt_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_abts_rjt; - return sizeof(p_drv_buf->rx_abts_rjt); - } - break; - case DRV_TLV_ABTS_1_SENT_DESTINATION_FC_ID: - if (p_drv_buf->abts_dst_fcid_set[0]) { - *p_tlv_buf = (u8 *)&p_drv_buf->abts_dst_fcid[0]; - return sizeof(p_drv_buf->abts_dst_fcid[0]); - } - break; - case DRV_TLV_ABTS_2_SENT_DESTINATION_FC_ID: - if (p_drv_buf->abts_dst_fcid_set[1]) { - *p_tlv_buf = (u8 *)&p_drv_buf->abts_dst_fcid[1]; - return sizeof(p_drv_buf->abts_dst_fcid[1]); - } - break; - case DRV_TLV_ABTS_3_SENT_DESTINATION_FC_ID: - if (p_drv_buf->abts_dst_fcid_set[2]) { - *p_tlv_buf = (u8 *)&p_drv_buf->abts_dst_fcid[2]; - return sizeof(p_drv_buf->abts_dst_fcid[2]); - } - break; - case DRV_TLV_ABTS_4_SENT_DESTINATION_FC_ID: - if (p_drv_buf->abts_dst_fcid_set[3]) { - *p_tlv_buf = (u8 *)&p_drv_buf->abts_dst_fcid[3]; - return sizeof(p_drv_buf->abts_dst_fcid[3]); - } - break; - case DRV_TLV_ABTS_5_SENT_DESTINATION_FC_ID: - if (p_drv_buf->abts_dst_fcid_set[4]) { - *p_tlv_buf = (u8 *)&p_drv_buf->abts_dst_fcid[4]; - return sizeof(p_drv_buf->abts_dst_fcid[4]); - } - break; - case DRV_TLV_ABTS_1_TIMESTAMP: - if (p_drv_buf->abts_tstamp_set[0]) { - *p_tlv_buf = (u8 *)&p_drv_buf->abts_tstamp[0]; - return sizeof(p_drv_buf->abts_tstamp[0]); - } - break; - case DRV_TLV_ABTS_2_TIMESTAMP: - if (p_drv_buf->abts_tstamp_set[1]) { - *p_tlv_buf = (u8 *)&p_drv_buf->abts_tstamp[1]; - return sizeof(p_drv_buf->abts_tstamp[1]); - } - break; - case DRV_TLV_ABTS_3_TIMESTAMP: - if (p_drv_buf->abts_tstamp_set[2]) { - *p_tlv_buf = (u8 *)&p_drv_buf->abts_tstamp[2]; - return sizeof(p_drv_buf->abts_tstamp[2]); - } - break; - case DRV_TLV_ABTS_4_TIMESTAMP: - if (p_drv_buf->abts_tstamp_set[3]) { - *p_tlv_buf = (u8 *)&p_drv_buf->abts_tstamp[3]; - return sizeof(p_drv_buf->abts_tstamp[3]); - } - break; - case DRV_TLV_ABTS_5_TIMESTAMP: - if (p_drv_buf->abts_tstamp_set[4]) { - *p_tlv_buf = (u8 *)&p_drv_buf->abts_tstamp[4]; - return sizeof(p_drv_buf->abts_tstamp[4]); - } - break; - case DRV_TLV_RSCNS_RECEIVED: - if (p_drv_buf->rx_rscn_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_rscn; - return sizeof(p_drv_buf->rx_rscn); - } - break; - case DRV_TLV_LAST_RSCN_RECEIVED_N_PORT_1: - if (p_drv_buf->rx_rscn_nport_set[0]) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_rscn_nport[0]; - return sizeof(p_drv_buf->rx_rscn_nport[0]); - } - break; - case DRV_TLV_LAST_RSCN_RECEIVED_N_PORT_2: - if (p_drv_buf->rx_rscn_nport_set[1]) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_rscn_nport[1]; - return sizeof(p_drv_buf->rx_rscn_nport[1]); - } - break; - case DRV_TLV_LAST_RSCN_RECEIVED_N_PORT_3: - if (p_drv_buf->rx_rscn_nport_set[2]) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_rscn_nport[2]; - return sizeof(p_drv_buf->rx_rscn_nport[2]); - } - break; - case DRV_TLV_LAST_RSCN_RECEIVED_N_PORT_4: - if (p_drv_buf->rx_rscn_nport_set[3]) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_rscn_nport[3]; - return sizeof(p_drv_buf->rx_rscn_nport[3]); - } - break; - case DRV_TLV_LUN_RESETS_ISSUED: - if (p_drv_buf->tx_lun_rst_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->tx_lun_rst; - return sizeof(p_drv_buf->tx_lun_rst); - } - break; - case DRV_TLV_ABORT_TASK_SETS_ISSUED: - if (p_drv_buf->abort_task_sets_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->abort_task_sets; - return sizeof(p_drv_buf->abort_task_sets); - } - break; - case DRV_TLV_TPRLOS_SENT: - if (p_drv_buf->tx_tprlos_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->tx_tprlos; - return sizeof(p_drv_buf->tx_tprlos); - } - break; - case DRV_TLV_NOS_SENT_COUNT: - if (p_drv_buf->tx_nos_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->tx_nos; - return sizeof(p_drv_buf->tx_nos); - } - break; - case DRV_TLV_NOS_RECEIVED_COUNT: - if (p_drv_buf->rx_nos_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_nos; - return sizeof(p_drv_buf->rx_nos); - } - break; - case DRV_TLV_OLS_COUNT: - if (p_drv_buf->ols_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->ols; - return sizeof(p_drv_buf->ols); - } - break; - case DRV_TLV_LR_COUNT: - if (p_drv_buf->lr_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->lr; - return sizeof(p_drv_buf->lr); - } - break; - case DRV_TLV_LRR_COUNT: - if (p_drv_buf->lrr_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->lrr; - return sizeof(p_drv_buf->lrr); - } - break; - case DRV_TLV_LIP_SENT_COUNT: - if (p_drv_buf->tx_lip_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->tx_lip; - return sizeof(p_drv_buf->tx_lip); - } - break; - case DRV_TLV_LIP_RECEIVED_COUNT: - if (p_drv_buf->rx_lip_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_lip; - return sizeof(p_drv_buf->rx_lip); - } - break; - case DRV_TLV_EOFA_COUNT: - if (p_drv_buf->eofa_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->eofa; - return sizeof(p_drv_buf->eofa); - } - break; - case DRV_TLV_EOFNI_COUNT: - if (p_drv_buf->eofni_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->eofni; - return sizeof(p_drv_buf->eofni); - } - break; - case DRV_TLV_SCSI_STATUS_CHECK_CONDITION_COUNT: - if (p_drv_buf->scsi_chks_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_chks; - return sizeof(p_drv_buf->scsi_chks); - } - break; - case DRV_TLV_SCSI_STATUS_CONDITION_MET_COUNT: - if (p_drv_buf->scsi_cond_met_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_cond_met; - return sizeof(p_drv_buf->scsi_cond_met); - } - break; - case DRV_TLV_SCSI_STATUS_BUSY_COUNT: - if (p_drv_buf->scsi_busy_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_busy; - return sizeof(p_drv_buf->scsi_busy); - } - break; - case DRV_TLV_SCSI_STATUS_INTERMEDIATE_COUNT: - if (p_drv_buf->scsi_inter_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_inter; - return sizeof(p_drv_buf->scsi_inter); - } - break; - case DRV_TLV_SCSI_STATUS_INTERMEDIATE_CONDITION_MET_COUNT: - if (p_drv_buf->scsi_inter_cond_met_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_inter_cond_met; - return sizeof(p_drv_buf->scsi_inter_cond_met); - } - break; - case DRV_TLV_SCSI_STATUS_RESERVATION_CONFLICT_COUNT: - if (p_drv_buf->scsi_rsv_conflicts_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_rsv_conflicts; - return sizeof(p_drv_buf->scsi_rsv_conflicts); - } - break; - case DRV_TLV_SCSI_STATUS_TASK_SET_FULL_COUNT: - if (p_drv_buf->scsi_tsk_full_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_tsk_full; - return sizeof(p_drv_buf->scsi_tsk_full); - } - break; - case DRV_TLV_SCSI_STATUS_ACA_ACTIVE_COUNT: - if (p_drv_buf->scsi_aca_active_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_aca_active; - return sizeof(p_drv_buf->scsi_aca_active); - } - break; - case DRV_TLV_SCSI_STATUS_TASK_ABORTED_COUNT: - if (p_drv_buf->scsi_tsk_abort_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_tsk_abort; - return sizeof(p_drv_buf->scsi_tsk_abort); - } - break; - case DRV_TLV_SCSI_CHECK_CONDITION_1_RECEIVED_SK_ASC_ASCQ: - if (p_drv_buf->scsi_rx_chk_set[0]) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_rx_chk[0]; - return sizeof(p_drv_buf->scsi_rx_chk[0]); - } - break; - case DRV_TLV_SCSI_CHECK_CONDITION_2_RECEIVED_SK_ASC_ASCQ: - if (p_drv_buf->scsi_rx_chk_set[1]) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_rx_chk[1]; - return sizeof(p_drv_buf->scsi_rx_chk[1]); - } - break; - case DRV_TLV_SCSI_CHECK_CONDITION_3_RECEIVED_SK_ASC_ASCQ: - if (p_drv_buf->scsi_rx_chk_set[2]) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_rx_chk[2]; - return sizeof(p_drv_buf->scsi_rx_chk[2]); - } - break; - case DRV_TLV_SCSI_CHECK_CONDITION_4_RECEIVED_SK_ASC_ASCQ: - if (p_drv_buf->scsi_rx_chk_set[3]) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_rx_chk[3]; - return sizeof(p_drv_buf->scsi_rx_chk[4]); - } - break; - case DRV_TLV_SCSI_CHECK_CONDITION_5_RECEIVED_SK_ASC_ASCQ: - if (p_drv_buf->scsi_rx_chk_set[4]) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_rx_chk[4]; - return sizeof(p_drv_buf->scsi_rx_chk[4]); - } - break; - case DRV_TLV_SCSI_CHECK_1_TIMESTAMP: - if (p_drv_buf->scsi_chk_tstamp_set[0]) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_chk_tstamp[0]; - return sizeof(p_drv_buf->scsi_chk_tstamp[0]); - } - break; - case DRV_TLV_SCSI_CHECK_2_TIMESTAMP: - if (p_drv_buf->scsi_chk_tstamp_set[1]) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_chk_tstamp[1]; - return sizeof(p_drv_buf->scsi_chk_tstamp[1]); - } - break; - case DRV_TLV_SCSI_CHECK_3_TIMESTAMP: - if (p_drv_buf->scsi_chk_tstamp_set[2]) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_chk_tstamp[2]; - return sizeof(p_drv_buf->scsi_chk_tstamp[2]); - } - break; - case DRV_TLV_SCSI_CHECK_4_TIMESTAMP: - if (p_drv_buf->scsi_chk_tstamp_set[3]) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_chk_tstamp[3]; - return sizeof(p_drv_buf->scsi_chk_tstamp[3]); - } - break; - case DRV_TLV_SCSI_CHECK_5_TIMESTAMP: - if (p_drv_buf->scsi_chk_tstamp_set[4]) { - *p_tlv_buf = (u8 *)&p_drv_buf->scsi_chk_tstamp[4]; - return sizeof(p_drv_buf->scsi_chk_tstamp[4]); - } - break; - default: - break; - } - - return -1; -} - -static int -ecore_mfw_get_iscsi_tlv_value(struct ecore_drv_tlv_hdr *p_tlv, - struct ecore_mfw_tlv_iscsi *p_drv_buf, - u8 **p_tlv_buf) -{ - switch (p_tlv->tlv_type) { - case DRV_TLV_TARGET_LLMNR_ENABLED: - if (p_drv_buf->target_llmnr_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->target_llmnr; - return sizeof(p_drv_buf->target_llmnr); - } - break; - case DRV_TLV_HEADER_DIGEST_FLAG_ENABLED: - if (p_drv_buf->header_digest_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->header_digest; - return sizeof(p_drv_buf->header_digest); - } - break; - case DRV_TLV_DATA_DIGEST_FLAG_ENABLED: - if (p_drv_buf->data_digest_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->data_digest; - return sizeof(p_drv_buf->data_digest); - } - break; - case DRV_TLV_AUTHENTICATION_METHOD: - if (p_drv_buf->auth_method_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->auth_method; - return sizeof(p_drv_buf->auth_method); - } - break; - case DRV_TLV_ISCSI_BOOT_TARGET_PORTAL: - if (p_drv_buf->boot_taget_portal_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->boot_taget_portal; - return sizeof(p_drv_buf->boot_taget_portal); - } - break; - case DRV_TLV_MAX_FRAME_SIZE: - if (p_drv_buf->frame_size_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->frame_size; - return sizeof(p_drv_buf->frame_size); - } - break; - case DRV_TLV_PDU_TX_DESCRIPTORS_QUEUE_SIZE: - if (p_drv_buf->tx_desc_size_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->tx_desc_size; - return sizeof(p_drv_buf->tx_desc_size); - } - break; - case DRV_TLV_PDU_RX_DESCRIPTORS_QUEUE_SIZE: - if (p_drv_buf->rx_desc_size_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_desc_size; - return sizeof(p_drv_buf->rx_desc_size); - } - break; - case DRV_TLV_ISCSI_BOOT_PROGRESS: - if (p_drv_buf->boot_progress_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->boot_progress; - return sizeof(p_drv_buf->boot_progress); - } - break; - case DRV_TLV_PDU_TX_DESCRIPTOR_QUEUE_AVG_DEPTH: - if (p_drv_buf->tx_desc_qdepth_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->tx_desc_qdepth; - return sizeof(p_drv_buf->tx_desc_qdepth); - } - break; - case DRV_TLV_PDU_RX_DESCRIPTORS_QUEUE_AVG_DEPTH: - if (p_drv_buf->rx_desc_qdepth_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_desc_qdepth; - return sizeof(p_drv_buf->rx_desc_qdepth); - } - break; - case DRV_TLV_ISCSI_PDU_RX_FRAMES_RECEIVED: - if (p_drv_buf->rx_frames_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_frames; - return sizeof(p_drv_buf->rx_frames); - } - break; - case DRV_TLV_ISCSI_PDU_RX_BYTES_RECEIVED: - if (p_drv_buf->rx_bytes_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->rx_bytes; - return sizeof(p_drv_buf->rx_bytes); - } - break; - case DRV_TLV_ISCSI_PDU_TX_FRAMES_SENT: - if (p_drv_buf->tx_frames_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->tx_frames; - return sizeof(p_drv_buf->tx_frames); - } - break; - case DRV_TLV_ISCSI_PDU_TX_BYTES_SENT: - if (p_drv_buf->tx_bytes_set) { - *p_tlv_buf = (u8 *)&p_drv_buf->tx_bytes; - return sizeof(p_drv_buf->tx_bytes); - } - break; - default: - break; - } - - return -1; -} - -static enum _ecore_status_t ecore_mfw_update_tlvs(struct ecore_hwfn *p_hwfn, - u8 tlv_group, u8 *p_mfw_buf, - u32 size) -{ - union ecore_mfw_tlv_data *p_tlv_data; - struct ecore_drv_tlv_hdr tlv; - u8 *p_tlv_ptr = OSAL_NULL, *p_temp; - u32 offset; - int len; - - p_tlv_data = OSAL_VZALLOC(p_hwfn->p_dev, sizeof(*p_tlv_data)); - if (!p_tlv_data) - return ECORE_NOMEM; - - if (OSAL_MFW_FILL_TLV_DATA(p_hwfn, tlv_group, p_tlv_data)) { - OSAL_VFREE(p_hwfn->p_dev, p_tlv_data); - return ECORE_INVAL; - } - - offset = 0; - OSAL_MEMSET(&tlv, 0, sizeof(tlv)); - while (offset < size) { - p_temp = &p_mfw_buf[offset]; - tlv.tlv_type = TLV_TYPE(p_temp); - tlv.tlv_length = TLV_LENGTH(p_temp); - tlv.tlv_flags = TLV_FLAGS(p_temp); - DP_INFO(p_hwfn, "Type %d length = %d flags = 0x%x\n", - tlv.tlv_type, tlv.tlv_length, tlv.tlv_flags); - - offset += sizeof(tlv); - if (tlv_group == ECORE_MFW_TLV_GENERIC) - len = ecore_mfw_get_gen_tlv_value(&tlv, - &p_tlv_data->generic, &p_tlv_ptr); - else if (tlv_group == ECORE_MFW_TLV_ETH) - len = ecore_mfw_get_eth_tlv_value(&tlv, - &p_tlv_data->eth, &p_tlv_ptr); - else if (tlv_group == ECORE_MFW_TLV_FCOE) - len = ecore_mfw_get_fcoe_tlv_value(&tlv, - &p_tlv_data->fcoe, &p_tlv_ptr); - else - len = ecore_mfw_get_iscsi_tlv_value(&tlv, - &p_tlv_data->iscsi, &p_tlv_ptr); - - if (len > 0) { - OSAL_WARN(len > 4 * tlv.tlv_length, - "Incorrect MFW TLV length"); - len = OSAL_MIN_T(int, len, 4 * tlv.tlv_length); - tlv.tlv_flags |= ECORE_DRV_TLV_FLAGS_CHANGED; - /* TODO: Endianness handling? */ - OSAL_MEMCPY(p_mfw_buf, &tlv, sizeof(tlv)); - OSAL_MEMCPY(p_mfw_buf + offset, p_tlv_ptr, len); - } - - offset += sizeof(u32) * tlv.tlv_length; - } - - OSAL_VFREE(p_hwfn->p_dev, p_tlv_data); - - return ECORE_SUCCESS; -} - -enum _ecore_status_t -ecore_mfw_process_tlv_req(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt) -{ - u32 addr, size, offset, resp, param, val; - u8 tlv_group = 0, id, *p_mfw_buf = OSAL_NULL, *p_temp; - u32 global_offsize, global_addr; - enum _ecore_status_t rc; - struct ecore_drv_tlv_hdr tlv; - - addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base, - PUBLIC_GLOBAL); - global_offsize = ecore_rd(p_hwfn, p_ptt, addr); - global_addr = SECTION_ADDR(global_offsize, 0); - addr = global_addr + OFFSETOF(struct public_global, data_ptr); - size = ecore_rd(p_hwfn, p_ptt, global_addr + - OFFSETOF(struct public_global, data_size)); - - if (!size) { - DP_NOTICE(p_hwfn, false, "Invalid TLV req size = %d\n", size); - goto drv_done; - } - - p_mfw_buf = (void *)OSAL_VZALLOC(p_hwfn->p_dev, size); - if (!p_mfw_buf) { - DP_NOTICE(p_hwfn, false, - "Failed allocate memory for p_mfw_buf\n"); - goto drv_done; - } - - /* Read the TLV request to local buffer */ - for (offset = 0; offset < size; offset += sizeof(u32)) { - val = ecore_rd(p_hwfn, p_ptt, addr + offset); - OSAL_MEMCPY(&p_mfw_buf[offset], &val, sizeof(u32)); - } - - /* Parse the headers to enumerate the requested TLV groups */ - for (offset = 0; offset < size; - offset += sizeof(tlv) + sizeof(u32) * tlv.tlv_length) { - p_temp = &p_mfw_buf[offset]; - tlv.tlv_type = TLV_TYPE(p_temp); - tlv.tlv_length = TLV_LENGTH(p_temp); - if (ecore_mfw_get_tlv_group(tlv.tlv_type, &tlv_group)) - goto drv_done; - } - - /* Update the TLV values in the local buffer */ - for (id = ECORE_MFW_TLV_GENERIC; id < ECORE_MFW_TLV_MAX; id <<= 1) { - if (tlv_group & id) { - if (ecore_mfw_update_tlvs(p_hwfn, id, p_mfw_buf, size)) - goto drv_done; - } - } - - /* Write the TLV data to shared memory */ - for (offset = 0; offset < size; offset += sizeof(u32)) { - val = (u32)p_mfw_buf[offset]; - ecore_wr(p_hwfn, p_ptt, addr + offset, val); - offset += sizeof(u32); - } - -drv_done: - rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_GET_TLV_DONE, 0, &resp, - ¶m); - - OSAL_VFREE(p_hwfn->p_dev, p_mfw_buf); - - return rc; -} From 4e8a41acaffb0e96bfdd65e4b0c31d2cee6460dc Mon Sep 17 00:00:00 2001 From: Tejasree Kondoj Date: Tue, 11 Nov 2025 16:46:43 +0530 Subject: [PATCH 48/99] crypto/cnxk: support CPT CQ for CN20K Adding CPT CQ support for cn20k. Signed-off-by: Tejasree Kondoj --- drivers/common/cnxk/roc_cpt.c | 3 +- drivers/common/cnxk/roc_cpt.h | 2 + drivers/crypto/cnxk/cn20k_cryptodev_ops.c | 5 +- drivers/crypto/cnxk/cnxk_cryptodev_ops.c | 83 ++++++++++++++++------- drivers/crypto/cnxk/cnxk_cryptodev_ops.h | 5 +- 5 files changed, 67 insertions(+), 31 deletions(-) diff --git a/drivers/common/cnxk/roc_cpt.c b/drivers/common/cnxk/roc_cpt.c index 4e610109b47..83e0c9896b0 100644 --- a/drivers/common/cnxk/roc_cpt.c +++ b/drivers/common/cnxk/roc_cpt.c @@ -24,7 +24,6 @@ #define CPT_LF_MAX_NB_DESC 128000 #define CPT_LF_DEFAULT_NB_DESC 1024 #define CPT_LF_FC_MIN_THRESHOLD 32 -#define CQ_ENTRY_SIZE_UNIT 32 static struct cpt_int_cb { roc_cpt_int_misc_cb_t cb; @@ -704,7 +703,7 @@ cpt_lf_cq_init(struct roc_cpt_lf *lf) roc_cpt_cq_disable(lf); /* Set command queue base address */ - len = PLT_ALIGN(lf->cq_size * (CQ_ENTRY_SIZE_UNIT << lf->cq_entry_size), ROC_ALIGN); + len = PLT_ALIGN(lf->cq_size * (ROC_CPT_CQ_ENTRY_SIZE_UNIT << lf->cq_entry_size), ROC_ALIGN); lf->cq_vaddr = plt_zmalloc(len, ROC_ALIGN); if (lf->cq_vaddr == NULL) return -ENOMEM; diff --git a/drivers/common/cnxk/roc_cpt.h b/drivers/common/cnxk/roc_cpt.h index 41a681e2a53..67956758be8 100644 --- a/drivers/common/cnxk/roc_cpt.h +++ b/drivers/common/cnxk/roc_cpt.h @@ -133,6 +133,8 @@ #define ROC_CPTR_CACHE_LINE_SZ 256 #define ROC_CPTR_ALIGN ROC_CPTR_CACHE_LINE_SZ +#define ROC_CPT_CQ_ENTRY_SIZE_UNIT 32 + enum { ROC_CPT_REVISION_ID_83XX = 0, ROC_CPT_REVISION_ID_96XX_B0 = 1, diff --git a/drivers/crypto/cnxk/cn20k_cryptodev_ops.c b/drivers/crypto/cnxk/cn20k_cryptodev_ops.c index d56f6b9d63f..18100ff1f82 100644 --- a/drivers/crypto/cnxk/cn20k_cryptodev_ops.c +++ b/drivers/crypto/cnxk/cn20k_cryptodev_ops.c @@ -261,8 +261,9 @@ cn20k_cpt_fill_inst(struct cnxk_cpt_qp *qp, struct rte_crypto_op *ops[], struct } inst[0].res_addr = (uint64_t)&infl_req->res; - rte_atomic_store_explicit((RTE_ATOMIC(uint64_t) *)(&infl_req->res.u64[0]), - res.u64[0], rte_memory_order_relaxed); + inst[0].cq_ena = 1; + rte_atomic_store_explicit((RTE_ATOMIC(uint64_t) *)(&infl_req->res.u64[0]), res.u64[0], + rte_memory_order_relaxed); infl_req->cop = op; inst[0].w7.u64 = w7; diff --git a/drivers/crypto/cnxk/cnxk_cryptodev_ops.c b/drivers/crypto/cnxk/cnxk_cryptodev_ops.c index 7dc4c684cc6..370f311dd37 100644 --- a/drivers/crypto/cnxk/cnxk_cryptodev_ops.c +++ b/drivers/crypto/cnxk/cnxk_cryptodev_ops.c @@ -199,14 +199,19 @@ cnxk_cpt_dev_start(struct rte_cryptodev *dev) struct cnxk_cpt_vf *vf = dev->data->dev_private; struct roc_cpt *roc_cpt = &vf->cpt; uint16_t nb_lf = roc_cpt->nb_lf; + struct roc_cpt_lf *lf; uint16_t qp_id; for (qp_id = 0; qp_id < nb_lf; qp_id++) { + lf = vf->cpt.lf[qp_id]; + /* Application may not setup all queue pair */ - if (roc_cpt->lf[qp_id] == NULL) + if (lf == NULL) continue; - roc_cpt_iq_enable(roc_cpt->lf[qp_id]); + roc_cpt_iq_enable(lf); + if (lf->cpt_cq_ena) + roc_cpt_cq_enable(lf); } return 0; @@ -218,13 +223,17 @@ cnxk_cpt_dev_stop(struct rte_cryptodev *dev) struct cnxk_cpt_vf *vf = dev->data->dev_private; struct roc_cpt *roc_cpt = &vf->cpt; uint16_t nb_lf = roc_cpt->nb_lf; + struct roc_cpt_lf *lf; uint16_t qp_id; for (qp_id = 0; qp_id < nb_lf; qp_id++) { - if (roc_cpt->lf[qp_id] == NULL) + lf = vf->cpt.lf[qp_id]; + if (lf == NULL) continue; roc_cpt_iq_disable(roc_cpt->lf[qp_id]); + if (lf->cpt_cq_ena) + roc_cpt_cq_disable(lf); } } @@ -347,7 +356,7 @@ static struct cnxk_cpt_qp * cnxk_cpt_qp_create(const struct rte_cryptodev *dev, uint16_t qp_id, uint32_t iq_len) { - const struct rte_memzone *pq_mem; + const struct rte_memzone *pq_mem = NULL; char name[RTE_MEMZONE_NAMESIZE]; struct cnxk_cpt_qp *qp; uint32_t len; @@ -363,23 +372,25 @@ cnxk_cpt_qp_create(const struct rte_cryptodev *dev, uint16_t qp_id, } /* For pending queue */ - len = iq_len * sizeof(struct cpt_inflight_req); + if (!roc_model_is_cn20k()) { + len = iq_len * sizeof(struct cpt_inflight_req); - qp_memzone_name_get(name, RTE_MEMZONE_NAMESIZE, dev->data->dev_id, - qp_id); + qp_memzone_name_get(name, RTE_MEMZONE_NAMESIZE, dev->data->dev_id, qp_id); - pq_mem = rte_memzone_reserve_aligned(name, len, rte_socket_id(), - RTE_MEMZONE_SIZE_HINT_ONLY | - RTE_MEMZONE_256MB, - RTE_CACHE_LINE_SIZE); - if (pq_mem == NULL) { - plt_err("Could not allocate reserved memzone"); - goto qp_free; - } + pq_mem = rte_memzone_reserve_aligned(name, len, rte_socket_id(), + RTE_MEMZONE_SIZE_HINT_ONLY | RTE_MEMZONE_256MB, + RTE_CACHE_LINE_SIZE); + if (pq_mem == NULL) { + plt_err("Could not allocate reserved memzone"); + goto qp_free; + } - va = pq_mem->addr; + va = pq_mem->addr; - memset(va, 0, len); + memset(va, 0, len); + + qp->pend_q.req_queue = pq_mem->addr; + } ret = cnxk_cpt_metabuf_mempool_create(dev, qp, qp_id, iq_len); if (ret) { @@ -388,14 +399,14 @@ cnxk_cpt_qp_create(const struct rte_cryptodev *dev, uint16_t qp_id, } /* Initialize pending queue */ - qp->pend_q.req_queue = pq_mem->addr; qp->pend_q.head = 0; qp->pend_q.tail = 0; return qp; pq_mem_free: - rte_memzone_free(pq_mem); + if (!roc_model_is_cn20k()) + rte_memzone_free(pq_mem); qp_free: rte_free(qp); return NULL; @@ -410,14 +421,15 @@ cnxk_cpt_qp_destroy(const struct rte_cryptodev *dev, struct cnxk_cpt_qp *qp) cnxk_cpt_metabuf_mempool_destroy(qp); - qp_memzone_name_get(name, RTE_MEMZONE_NAMESIZE, dev->data->dev_id, - qp->lf.lf_id); + if (!roc_model_is_cn20k()) { + qp_memzone_name_get(name, RTE_MEMZONE_NAMESIZE, dev->data->dev_id, qp->lf.lf_id); - pq_mem = rte_memzone_lookup(name); + pq_mem = rte_memzone_lookup(name); - ret = rte_memzone_free(pq_mem); - if (ret) - return ret; + ret = rte_memzone_free(pq_mem); + if (ret) + return ret; + } rte_free(qp); @@ -487,6 +499,13 @@ cnxk_cpt_queue_pair_setup(struct rte_cryptodev *dev, uint16_t qp_id, qp->lf.lf_id = qp_id; qp->lf.nb_desc = nb_desc; + if (roc_model_is_cn20k()) { + qp->lf.cpt_cq_ena = true; + qp->lf.dq_ack_ena = false; + /* CQ entry size is 128B(32 << 2) */ + qp->lf.cq_entry_size = 2; + qp->lf.cq_size = nb_desc; + } ret = roc_cpt_lf_init(roc_cpt, &qp->lf); if (ret < 0) { @@ -497,6 +516,17 @@ cnxk_cpt_queue_pair_setup(struct rte_cryptodev *dev, uint16_t qp_id, qp->pend_q.pq_mask = qp->lf.nb_desc - 1; + if (roc_model_is_cn20k()) { + if (qp->lf.cq_vaddr == NULL) { + plt_err("Could not initialize completion queue"); + ret = -EINVAL; + goto exit; + } + + qp->pend_q.req_queue = PLT_PTR_ADD( + qp->lf.cq_vaddr, ROC_CPT_CQ_ENTRY_SIZE_UNIT << qp->lf.cq_entry_size); + } + roc_cpt->lf[qp_id] = &qp->lf; ret = roc_cpt_lmtline_init(roc_cpt, &qp->lmtline, qp_id, true); @@ -544,6 +574,9 @@ cnxk_cpt_queue_pair_reset(struct rte_cryptodev *dev, uint16_t qp_id, roc_cpt_lf_reset(lf); roc_cpt_iq_enable(lf); + if (lf->cpt_cq_ena) + roc_cpt_cq_enable(lf); + return 0; } diff --git a/drivers/crypto/cnxk/cnxk_cryptodev_ops.h b/drivers/crypto/cnxk/cnxk_cryptodev_ops.h index 03af1029ced..32fc7a26fce 100644 --- a/drivers/crypto/cnxk/cnxk_cryptodev_ops.h +++ b/drivers/crypto/cnxk/cnxk_cryptodev_ops.h @@ -54,13 +54,15 @@ struct cpt_qp_meta_info { struct __rte_aligned(ROC_ALIGN) cpt_inflight_req { union cpt_res_s res; + uint8_t rsvd[16]; + uint8_t meta[META_LEN]; union { void *opaque; struct rte_crypto_op *cop; struct rte_event_vector *vec; }; + void *qp; void *mdata; - uint8_t meta[META_LEN]; uint8_t op_flags; #ifdef CPT_INST_DEBUG_ENABLE uint8_t scatter_sz; @@ -69,7 +71,6 @@ struct __rte_aligned(ROC_ALIGN) cpt_inflight_req { uint8_t *dptr; uint8_t *rptr; #endif - void *qp; }; PLT_STATIC_ASSERT(sizeof(struct cpt_inflight_req) == ROC_CACHE_LINE_SZ); From 3ffcfc48040a076643ec58c5f53b77069305afda Mon Sep 17 00:00:00 2001 From: Emma Finn Date: Wed, 12 Nov 2025 10:19:10 +0000 Subject: [PATCH 49/99] crypto/qat: fix CCM request descriptor hash state size The hash_state_sz field in the QAT request descriptor for AES-CCM operations was incorrectly set to the digest length. According to the QAT firmware specification, this field should contain the length of AAD in quadwords for CCM operations. Fixes: ab56c4d9ed9a ("crypto/qat: support AES-CCM") Cc: stable@dpdk.org Signed-off-by: Emma Finn --- drivers/crypto/qat/qat_sym_session.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/qat/qat_sym_session.c b/drivers/crypto/qat/qat_sym_session.c index 98ab82ffb38..2ba4fc0a3a5 100644 --- a/drivers/crypto/qat/qat_sym_session.c +++ b/drivers/crypto/qat/qat_sym_session.c @@ -2774,7 +2774,8 @@ static int qat_sym_cd_auth_set(struct qat_sym_session *cdesc, hash->auth_counter.counter = 0; hash_cd_ctrl->outer_prefix_sz = digestsize; - auth_param->hash_state_sz = digestsize; + auth_param->hash_state_sz = (RTE_ALIGN_CEIL(auth_param->u2.aad_sz, + ICP_QAT_HW_CCM_AAD_ALIGNMENT) >> 3); memcpy(cdesc->cd_cur_ptr + state1_size, authkey, authkeylen); break; From 2c5b18a3ba6676d3d7e6fd2985d12b023350be19 Mon Sep 17 00:00:00 2001 From: Emma Finn Date: Fri, 14 Nov 2025 16:20:35 +0000 Subject: [PATCH 50/99] test/crypto: add AES-CCM with 32-byte AAD Add new test case that validates AES-CCM operations with a 32-byte AAD. This test case specifically verifies the handling of larger AAD length with smaller authentication tags. Signed-off-by: Emma Finn --- app/test/test_cryptodev.c | 16 +++++++ app/test/test_cryptodev_aead_test_vectors.h | 53 +++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c index 6e5f308e551..9bdd3577270 100644 --- a/app/test/test_cryptodev.c +++ b/app/test/test_cryptodev.c @@ -14271,6 +14271,12 @@ test_AES_CCM_authenticated_encryption_test_case_128_3(void) return test_authenticated_encryption(&ccm_test_case_128_3); } +static int +test_AES_CCM_authenticated_encryption_test_case_128_4(void) +{ + return test_authenticated_encryption(&ccm_test_case_128_4); +} + static int test_AES_CCM_authenticated_decryption_test_case_128_1(void) { @@ -14289,6 +14295,12 @@ test_AES_CCM_authenticated_decryption_test_case_128_3(void) return test_authenticated_decryption(&ccm_test_case_128_3); } +static int +test_AES_CCM_authenticated_decryption_test_case_128_4(void) +{ + return test_authenticated_decryption(&ccm_test_case_128_4); +} + static int test_AES_CCM_authenticated_encryption_test_case_192_1(void) { @@ -18975,6 +18987,8 @@ static struct unit_test_suite cryptodev_aes_ccm_auth_testsuite = { test_AES_CCM_authenticated_encryption_test_case_128_2), TEST_CASE_ST(ut_setup, ut_teardown, test_AES_CCM_authenticated_encryption_test_case_128_3), + TEST_CASE_ST(ut_setup, ut_teardown, + test_AES_CCM_authenticated_encryption_test_case_128_4), /** AES CCM Authenticated Decryption 128 bits key*/ TEST_CASE_ST(ut_setup, ut_teardown, @@ -18983,6 +18997,8 @@ static struct unit_test_suite cryptodev_aes_ccm_auth_testsuite = { test_AES_CCM_authenticated_decryption_test_case_128_2), TEST_CASE_ST(ut_setup, ut_teardown, test_AES_CCM_authenticated_decryption_test_case_128_3), + TEST_CASE_ST(ut_setup, ut_teardown, + test_AES_CCM_authenticated_decryption_test_case_128_4), /** AES CCM Authenticated Encryption 192 bits key */ TEST_CASE_ST(ut_setup, ut_teardown, diff --git a/app/test/test_cryptodev_aead_test_vectors.h b/app/test/test_cryptodev_aead_test_vectors.h index 73bedaf557b..6b9642e339a 100644 --- a/app/test/test_cryptodev_aead_test_vectors.h +++ b/app/test/test_cryptodev_aead_test_vectors.h @@ -50,6 +50,13 @@ static uint8_t ccm_aad_test_2[22] = { 0xA5, 0xB8, 0xFC, 0xBA, 0x00, 0x00 }; +static uint8_t ccm_aad_test_4[32] = { + 0xD2, 0x3D, 0xBD, 0xF2, 0x13, 0xAA, 0xD5, 0x03, + 0xDB, 0xA5, 0x3E, 0x44, 0xBB, 0x1E, 0xD7, 0x19, + 0x65, 0x2F, 0x37, 0xA0, 0x97, 0xD6, 0x5F, 0x59, + 0x0B, 0xD1, 0xE6, 0xA4, 0xCC, 0x50, 0xEB, 0x62 +}; + static uint8_t sm4_gcm_aad_test_2[] = { 0x3f, 0x89, 0x42, 0x20 }; @@ -3714,6 +3721,52 @@ static const struct aead_test_data ccm_test_case_128_3 = { } }; +static const struct aead_test_data ccm_test_case_128_4 = { + .algo = RTE_CRYPTO_AEAD_AES_CCM, + .key = { + .data = { + 0x2F, 0x55, 0xF2, 0xF8, 0x4E, 0x66, 0x5E, 0x4B, + 0x1D, 0x56, 0x91, 0x62, 0x6C, 0xE7, 0xD4, 0xB8 + }, + .len = 16 + }, + .iv = { + .data = { + 0x2C, 0x39, 0xBE, 0x24, 0xAF, 0xF4, 0xC1, 0x0E, + 0x20, 0x26, 0x11, 0x49, 0x49 + }, + .len = 13 + }, + .aad = { + .data = ccm_aad_test_4, + .len = 32 + }, + .plaintext = { + .data = { + 0xF5, 0x2A, 0xEC, 0xEF, 0x88, 0x73, 0x10, 0x11, + 0xD0, 0x68, 0xCB, 0x8E, 0x82, 0xBE, 0xED, 0x68, + 0xC1, 0xC6, 0xF2, 0xA5, 0x9E, 0x99, 0xF6, 0x3D, + 0xDA, 0xEA, 0xA0, 0x49, 0xD0, 0xF4, 0x18, 0xC0 + }, + .len = 32 + }, + .ciphertext = { + .data = { + 0x0F, 0x7B, 0x8A, 0x0D, 0x79, 0xB6, 0x1C, 0x93, + 0x00, 0x26, 0x57, 0x52, 0xC8, 0x68, 0x30, 0xF6, + 0x92, 0xA4, 0x9C, 0x86, 0x5E, 0x77, 0x51, 0x9C, + 0x79, 0x15, 0x7C, 0xF3, 0x87, 0x27, 0x2A, 0x77 + }, + .len = 32 + }, + .auth_tag = { + .data = { + 0x64, 0x2F, 0x58, 0x61 + }, + .len = 4 + } +}; + /** AES-CCM-192 Test Vectors */ static const struct aead_test_data ccm_test_case_192_1 = { .algo = RTE_CRYPTO_AEAD_AES_CCM, From 29a4d5307f1ce28f6c591c7eba2afc786e2fd089 Mon Sep 17 00:00:00 2001 From: David Marchand Date: Wed, 12 Nov 2025 10:10:44 +0100 Subject: [PATCH 51/99] crypto/dpaa2_sec: remove cryptodev pointer from bus device Calling rte_cryptodev_pmd_get_named_dev() is enough to retrieve the crypto device object. This leaves no user of the cryptodev field in the fslmc device object. Signed-off-by: David Marchand Tested-by: Hemant Agrawal --- drivers/bus/fslmc/bus_fslmc_driver.h | 1 - drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c | 8 +++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/bus/fslmc/bus_fslmc_driver.h b/drivers/bus/fslmc/bus_fslmc_driver.h index b6a6238a01b..45c7a519f90 100644 --- a/drivers/bus/fslmc/bus_fslmc_driver.h +++ b/drivers/bus/fslmc/bus_fslmc_driver.h @@ -99,7 +99,6 @@ struct rte_dpaa2_device { struct rte_device device; /**< Inherit core device */ union { struct rte_eth_dev *eth_dev; /**< ethernet device */ - struct rte_cryptodev *cryptodev; /**< Crypto Device */ struct rte_dma_dev *dmadev; /**< DMA Device */ struct rte_rawdev *rawdev; /**< Raw Device */ }; diff --git a/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c b/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c index ca10d88da7e..698548e6ead 100644 --- a/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c +++ b/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c @@ -4503,8 +4503,6 @@ cryptodev_dpaa2_sec_probe(struct rte_dpaa2_driver *dpaa2_drv __rte_unused, return -ENOMEM; } - dpaa2_dev->cryptodev = cryptodev; - if (dpaa2_svr_family == SVR_LX2160A) rta_set_sec_era(RTA_SEC_ERA_10); else @@ -4526,10 +4524,14 @@ cryptodev_dpaa2_sec_probe(struct rte_dpaa2_driver *dpaa2_drv __rte_unused, static int cryptodev_dpaa2_sec_remove(struct rte_dpaa2_device *dpaa2_dev) { + char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN]; struct rte_cryptodev *cryptodev; int ret; - cryptodev = dpaa2_dev->cryptodev; + snprintf(cryptodev_name, sizeof(cryptodev_name), "dpsec-%d", + dpaa2_dev->object_id); + + cryptodev = rte_cryptodev_pmd_get_named_dev(cryptodev_name); if (cryptodev == NULL) return -ENODEV; From 868a3abd0ee5f10af6d0b9ee6b547b4c1405d723 Mon Sep 17 00:00:00 2001 From: David Marchand Date: Wed, 12 Nov 2025 10:10:45 +0100 Subject: [PATCH 52/99] dma/dpaa2: remove dmadev pointer from bus device A driver .dev_close op is automatically called when releasing a dma device. Move device specific unitialisation in this driver .dev_close op. This leaves no user of the dmadev field in the fslmc device object. Signed-off-by: David Marchand Tested-by: Hemant Agrawal --- drivers/bus/fslmc/bus_fslmc_driver.h | 1 - drivers/dma/dpaa2/dpaa2_qdma.c | 9 +++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/bus/fslmc/bus_fslmc_driver.h b/drivers/bus/fslmc/bus_fslmc_driver.h index 45c7a519f90..efa88754a7c 100644 --- a/drivers/bus/fslmc/bus_fslmc_driver.h +++ b/drivers/bus/fslmc/bus_fslmc_driver.h @@ -99,7 +99,6 @@ struct rte_dpaa2_device { struct rte_device device; /**< Inherit core device */ union { struct rte_eth_dev *eth_dev; /**< ethernet device */ - struct rte_dma_dev *dmadev; /**< DMA Device */ struct rte_rawdev *rawdev; /**< Raw Device */ }; enum rte_dpaa2_dev_type dev_type; /**< Device Type */ diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c index 4be43d6bd99..beca464c720 100644 --- a/drivers/dma/dpaa2/dpaa2_qdma.c +++ b/drivers/dma/dpaa2/dpaa2_qdma.c @@ -1455,6 +1455,9 @@ dpaa2_qdma_stop(struct rte_dma_dev *dev) return 0; } +static int +dpaa2_dpdmai_dev_uninit(struct rte_dma_dev *dev); + static int dpaa2_qdma_close(struct rte_dma_dev *dev) { @@ -1505,6 +1508,8 @@ dpaa2_qdma_close(struct rte_dma_dev *dev) /* Reset QDMA device structure */ qdma_dev->num_vqs = 0; + dpaa2_dpdmai_dev_uninit(dev); + return 0; } @@ -1703,7 +1708,6 @@ dpaa2_qdma_probe(struct rte_dpaa2_driver *dpaa2_drv, return -EINVAL; } - dpaa2_dev->dmadev = dmadev; dmadev->dev_ops = &dpaa2_qdma_ops; dmadev->device = &dpaa2_dev->device; dmadev->fp_obj->dev_private = dmadev->data->dev_private; @@ -1727,13 +1731,10 @@ dpaa2_qdma_probe(struct rte_dpaa2_driver *dpaa2_drv, static int dpaa2_qdma_remove(struct rte_dpaa2_device *dpaa2_dev) { - struct rte_dma_dev *dmadev = dpaa2_dev->dmadev; int ret; DPAA2_QDMA_FUNC_TRACE(); - dpaa2_dpdmai_dev_uninit(dmadev); - ret = rte_dma_pmd_release(dpaa2_dev->device.name); if (ret) DPAA2_QDMA_ERR("Device cleanup failed"); From 603caa201d7fea95d9272aec5667cb4a23d0886d Mon Sep 17 00:00:00 2001 From: David Marchand Date: Wed, 12 Nov 2025 10:10:46 +0100 Subject: [PATCH 53/99] net/dpaa2: remove ethdev pointer from bus device This code was never used. This leaves no user of the ethdev field in the fslmc device object. Fixes: f023d059769f ("net/dpaa2: support recycle loopback port") Cc: stable@dpdk.org Signed-off-by: David Marchand Tested-by: Hemant Agrawal --- drivers/bus/fslmc/bus_fslmc_driver.h | 1 - drivers/net/dpaa2/dpaa2_ethdev.c | 1 - drivers/net/dpaa2/dpaa2_ethdev.h | 6 ---- drivers/net/dpaa2/dpaa2_recycle.c | 50 ---------------------------- 4 files changed, 58 deletions(-) diff --git a/drivers/bus/fslmc/bus_fslmc_driver.h b/drivers/bus/fslmc/bus_fslmc_driver.h index efa88754a7c..094f885f595 100644 --- a/drivers/bus/fslmc/bus_fslmc_driver.h +++ b/drivers/bus/fslmc/bus_fslmc_driver.h @@ -98,7 +98,6 @@ struct rte_dpaa2_device { TAILQ_ENTRY(rte_dpaa2_device) next; /**< Next probed DPAA2 device. */ struct rte_device device; /**< Inherit core device */ union { - struct rte_eth_dev *eth_dev; /**< ethernet device */ struct rte_rawdev *rawdev; /**< Raw Device */ }; enum rte_dpaa2_dev_type dev_type; /**< Device Type */ diff --git a/drivers/net/dpaa2/dpaa2_ethdev.c b/drivers/net/dpaa2/dpaa2_ethdev.c index 1dd4a1e32b0..fdf3e0f2207 100644 --- a/drivers/net/dpaa2/dpaa2_ethdev.c +++ b/drivers/net/dpaa2/dpaa2_ethdev.c @@ -3351,7 +3351,6 @@ rte_dpaa2_probe(struct rte_dpaa2_driver *dpaa2_drv, eth_dev->device = &dpaa2_dev->device; - dpaa2_dev->eth_dev = eth_dev; eth_dev->data->rx_mbuf_alloc_failed = 0; if (dpaa2_drv->drv_flags & RTE_DPAA2_DRV_INTR_LSC) diff --git a/drivers/net/dpaa2/dpaa2_ethdev.h b/drivers/net/dpaa2/dpaa2_ethdev.h index 87a94bc15bc..86b3022ddb6 100644 --- a/drivers/net/dpaa2/dpaa2_ethdev.h +++ b/drivers/net/dpaa2/dpaa2_ethdev.h @@ -531,12 +531,6 @@ int dpaa2_dev_recycle_config(struct rte_eth_dev *eth_dev); int dpaa2_dev_recycle_deconfig(struct rte_eth_dev *eth_dev); int dpaa2_soft_parser_loaded(void); -int dpaa2_dev_recycle_qp_setup(struct rte_dpaa2_device *dpaa2_dev, - uint16_t qidx, uint64_t cntx, - eth_rx_burst_t tx_lpbk, eth_tx_burst_t rx_lpbk, - struct dpaa2_queue **txq, - struct dpaa2_queue **rxq); - void dpaa2_dev_mac_setup_stats(struct rte_eth_dev *dev); diff --git a/drivers/net/dpaa2/dpaa2_recycle.c b/drivers/net/dpaa2/dpaa2_recycle.c index 94a7e2a020a..d1e21dd4d17 100644 --- a/drivers/net/dpaa2/dpaa2_recycle.c +++ b/drivers/net/dpaa2/dpaa2_recycle.c @@ -730,53 +730,3 @@ dpaa2_dev_recycle_deconfig(struct rte_eth_dev *eth_dev) return ret; } - -int -dpaa2_dev_recycle_qp_setup(struct rte_dpaa2_device *dpaa2_dev, - uint16_t qidx, uint64_t cntx, - eth_rx_burst_t tx_lpbk, eth_tx_burst_t rx_lpbk, - struct dpaa2_queue **txq, - struct dpaa2_queue **rxq) -{ - struct rte_eth_dev *dev; - struct rte_eth_dev_data *data; - struct dpaa2_queue *txq_tmp; - struct dpaa2_queue *rxq_tmp; - struct dpaa2_dev_priv *priv; - - dev = dpaa2_dev->eth_dev; - data = dev->data; - priv = data->dev_private; - - if (!(priv->flags & DPAA2_TX_LOOPBACK_MODE) && - (tx_lpbk || rx_lpbk)) { - DPAA2_PMD_ERR("%s is NOT recycle device!", data->name); - - return -EINVAL; - } - - if (qidx >= data->nb_rx_queues || qidx >= data->nb_tx_queues) - return -EINVAL; - - rte_spinlock_lock(&priv->lpbk_qp_lock); - - if (tx_lpbk) - dev->tx_pkt_burst = tx_lpbk; - - if (rx_lpbk) - dev->rx_pkt_burst = rx_lpbk; - - txq_tmp = data->tx_queues[qidx]; - txq_tmp->lpbk_cntx = cntx; - rxq_tmp = data->rx_queues[qidx]; - rxq_tmp->lpbk_cntx = cntx; - - if (txq) - *txq = txq_tmp; - if (rxq) - *rxq = rxq_tmp; - - rte_spinlock_unlock(&priv->lpbk_qp_lock); - - return 0; -} From 2db9ce15baa0e1eb6b0e254c7196c4c63cdc101f Mon Sep 17 00:00:00 2001 From: David Marchand Date: Wed, 12 Nov 2025 10:10:47 +0100 Subject: [PATCH 54/99] bus/fslmc: remove rawdev pointer There is no user of this field. Signed-off-by: David Marchand Tested-by: Hemant Agrawal --- drivers/bus/fslmc/bus_fslmc_driver.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/bus/fslmc/bus_fslmc_driver.h b/drivers/bus/fslmc/bus_fslmc_driver.h index 094f885f595..89abc3c486e 100644 --- a/drivers/bus/fslmc/bus_fslmc_driver.h +++ b/drivers/bus/fslmc/bus_fslmc_driver.h @@ -97,9 +97,6 @@ enum rte_dpaa2_dev_type { struct rte_dpaa2_device { TAILQ_ENTRY(rte_dpaa2_device) next; /**< Next probed DPAA2 device. */ struct rte_device device; /**< Inherit core device */ - union { - struct rte_rawdev *rawdev; /**< Raw Device */ - }; enum rte_dpaa2_dev_type dev_type; /**< Device Type */ uint16_t object_id; /**< DPAA2 Object ID */ enum rte_dpaa2_dev_type ep_dev_type; /**< Endpoint Device Type */ From 2f3b51da38b20e09e890cdddb9a0caf05c7fb716 Mon Sep 17 00:00:00 2001 From: Doug Foster Date: Tue, 11 Nov 2025 19:06:38 +0000 Subject: [PATCH 55/99] config/arm: update Cobalt-100 SoC Update the build configuration for Microsoft Azure Cobalt 100 SoC to use a CPU-specific mcpu value supported by GCC 14+. Signed-off-by: Doug Foster Reviewed-by: Wathsala Vithanage --- config/arm/meson.build | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/config/arm/meson.build b/config/arm/meson.build index c0aa21b57d5..523b0fc0ed5 100644 --- a/config/arm/meson.build +++ b/config/arm/meson.build @@ -300,7 +300,7 @@ implementer_microsoft = { 'flags': [], 'part_number_config': { '0xd49': { - 'mcpu': 'mcpu_cobalt100', + 'mcpu': 'cobalt-100', 'flags': [ ['RTE_MACHINE', '"neoverse-n2"'], ['RTE_ARM_FEATURE_ATOMICS', true], @@ -492,6 +492,7 @@ soc_cobalt100 = { 'description' : 'Microsoft Azure Cobalt 100', 'implementer' : '0x6d', 'part_number': '0xd49', + 'extra_march_features': ['crypto'], 'numa': true } @@ -703,10 +704,6 @@ mcpu_defs = { 'march': 'armv8-a', 'march_extensions': ['crc'] }, - 'mcpu_cobalt100': { - 'march': 'armv9-a', - 'march_extensions': ['crypto', 'sve', 'sve2'] - }, 'mcpu_cortex-a78ae': { 'march': 'armv8.4-a', 'march_extensions': ['crypto'] From a0b148048b3d9960788093b3b94c70af8f04136b Mon Sep 17 00:00:00 2001 From: Bing Zhao Date: Mon, 17 Nov 2025 09:28:58 +0200 Subject: [PATCH 56/99] app/flow-perf: fix rules array length The array used to save the flow rules pointer was allocated with an incorrect length. 1 more rule space should be appended but not 1 byte. Fixes: 070316d01d3e ("app/flow-perf: add multi-core rule insertion and deletion") Cc: stable@dpdk.org Signed-off-by: Bing Zhao Reviewed-by: Dariusz Sosnowski Acked-by: Wisam Jaddo --- app/test-flow-perf/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c index b2084b93ddd..a8876acf1f9 100644 --- a/app/test-flow-perf/main.c +++ b/app/test-flow-perf/main.c @@ -1487,7 +1487,7 @@ insert_flows(int port_id, uint8_t core_id, uint16_t dst_port_id) global_actions[0] = FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_JUMP); flows_list = rte_zmalloc("flows_list", - (sizeof(struct rte_flow *) * rules_count_per_core) + 1, 0); + (sizeof(struct rte_flow *) * (rules_count_per_core + 1)), 0); if (flows_list == NULL) rte_exit(EXIT_FAILURE, "No Memory available!\n"); From 1adf5b841c520959be1d1e5af4a08cc929f4f314 Mon Sep 17 00:00:00 2001 From: Jiawei Wang Date: Tue, 30 Sep 2025 15:02:47 +0300 Subject: [PATCH 57/99] doc: add multi-host LAG probing in mlx5 guide On the ConnectX-7 Multi-Host Setup, the PF index is not continuous, user need to query the PF index based on the return value of the related sysfs entries: cat /sys/class/net/*/phys_port_name Example output is 0 and 2 for PF1 and PF2, then use 0 and 2 of PF index:: -a ,representor=pf[0,2]vf[0-2] Signed-off-by: Jiawei Wang Acked-by: Dariusz Sosnowski --- doc/guides/nics/mlx5.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index 84bca44d6c6..32851d1327f 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -745,6 +745,16 @@ for an additional list of options shared with other mlx5 drivers. ,representor=pf[0,1]vf[0-2] + On ConnectX-7 multi-host setup, the PF index is not continuous, + and must be queried in sysfs:: + + cat /sys/class/net/*/phys_port_name + + With an example output 0 and 2 for PF1 and PF2, use [0,2] for PF index + to probe VF port representors 0 through 2 on both PFs of bonding device:: + + ,representor=pf[0,2]vf[0-2] + - ``repr_matching_en`` parameter [int] - 0. If representor matching is disabled, then there will be no implicit From 53fdc237df139bd439293b6dcfa09b9bf9b791b4 Mon Sep 17 00:00:00 2001 From: Maayan Kashani Date: Thu, 6 Nov 2025 10:38:16 +0200 Subject: [PATCH 58/99] common/mlx5: read SW steering capability bits SWS will be disabled in future HW generation. Checking SWS capability bits and returning relevant error. if user configuration is not supported, will be added in the follow up commit. Signed-off-by: Maayan Kashani Acked-by: Dariusz Sosnowski --- drivers/common/mlx5/mlx5_devx_cmds.c | 18 ++++++++++++++++++ drivers/common/mlx5/mlx5_devx_cmds.h | 6 ++++++ drivers/common/mlx5/mlx5_prm.h | 14 ++++++++++---- 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c index 385759230a2..22f6b29089e 100644 --- a/drivers/common/mlx5/mlx5_devx_cmds.c +++ b/drivers/common/mlx5/mlx5_devx_cmds.c @@ -1300,6 +1300,18 @@ mlx5_devx_cmd_query_hca_attr(void *ctx, rx_reg |= ((0xff & reg_c_8_15) << 8); attr->set_reg_c &= (rx_reg & tx_reg); + attr->rx_sw_owner_v2 = MLX5_GET(flow_table_nic_cap, hcattr, + flow_table_properties_nic_receive.sw_owner_v2); + if (!attr->rx_sw_owner_v2) + attr->rx_sw_owner = MLX5_GET(flow_table_nic_cap, hcattr, + flow_table_properties_nic_receive.sw_owner); + + attr->tx_sw_owner_v2 = MLX5_GET(flow_table_nic_cap, hcattr, + flow_table_properties_nic_transmit.sw_owner_v2); + if (!attr->tx_sw_owner_v2) + attr->tx_sw_owner = MLX5_GET(flow_table_nic_cap, hcattr, + flow_table_properties_nic_transmit.sw_owner); + #undef GET_RX_REG_X_BITS #undef GET_TX_REG_X_BITS } @@ -1452,6 +1464,12 @@ mlx5_devx_cmd_query_hca_attr(void *ctx, reg_c_8_15 = MLX5_GET(flow_table_esw_cap, hcattr, ft_field_support_2_esw_fdb.metadata_reg_c_8_15); attr->set_reg_c &= ((0xff & reg_c_8_15) << 8) | esw_reg; + + attr->esw_sw_owner_v2 = MLX5_GET(flow_table_esw_cap, hcattr, + flow_table_properties_nic_esw_fdb.sw_owner_v2); + if (!attr->esw_sw_owner_v2) + attr->esw_sw_owner = MLX5_GET(flow_table_esw_cap, hcattr, + flow_table_properties_nic_esw_fdb.sw_owner); } return 0; error: diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h index efae6826dcf..4c7747cbeca 100644 --- a/drivers/common/mlx5/mlx5_devx_cmds.h +++ b/drivers/common/mlx5/mlx5_devx_cmds.h @@ -336,6 +336,12 @@ struct mlx5_hca_attr { uint8_t max_header_modify_pattern_length; uint64_t system_image_guid; uint32_t log_max_conn_track_offload:5; + uint8_t rx_sw_owner:1; + uint8_t rx_sw_owner_v2:1; + uint8_t tx_sw_owner:1; + uint8_t tx_sw_owner_v2:1; + uint8_t esw_sw_owner:1; + uint8_t esw_sw_owner_v2:1; }; /* LAG Context. */ diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h index 5db8d67cfc1..6cde3f8f1a1 100644 --- a/drivers/common/mlx5/mlx5_prm.h +++ b/drivers/common/mlx5/mlx5_prm.h @@ -1593,9 +1593,13 @@ enum { #define MLX5_HCA_FLEX_GTPU_DW_0_ENABLED (1UL << 18) #define MLX5_HCA_FLEX_GTPU_TEID_ENABLED (1UL << 19) -/* The device steering logic format. */ -#define MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 0x0 -#define MLX5_STEERING_LOGIC_FORMAT_CONNECTX_6DX 0x1 +/* The device steering logic format version. */ +enum { + MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 = 0, + MLX5_STEERING_LOGIC_FORMAT_CONNECTX_6DX = 1, + MLX5_STEERING_LOGIC_FORMAT_CONNECTX_7 = 2, + MLX5_STEERING_LOGIC_FORMAT_CONNECTX_8 = 3, +}; struct mlx5_ifc_cmd_hca_cap_bits { u8 access_other_hca_roce[0x1]; @@ -2342,7 +2346,9 @@ struct mlx5_ifc_flow_table_nic_cap_bits { }; struct mlx5_ifc_flow_table_esw_cap_bits { - u8 reserved_at_0[0x800]; + u8 reserved_at_0[0x200]; + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb; + u8 reserved_at_400[0x400]; struct mlx5_ifc_ft_fields_support_bits ft_header_modify_esw_fdb; u8 reserved_at_C00[0x800]; struct mlx5_ifc_ft_fields_support_2_bits From d1ac7b6c64d987006891fcf35a13c05cf2b05d62 Mon Sep 17 00:00:00 2001 From: Maayan Kashani Date: Thu, 6 Nov 2025 10:38:17 +0200 Subject: [PATCH 59/99] net/mlx5: update flow devargs handling for future HW SWS (software steering) will be disabled on future hardware generations. Update the defaults for the dv_flow_en and allow_duplicate_pattern devargs accordingly. - Default dv_flow_en devarg value will be chosen based on whether NIC supports SW steering and/or HW steering. - If DV flow is not supported and allow_duplicate_pattern is set by the user, forcibly disable it and emit a clear log message. This change improves reliability by ensuring only valid configurations are applied, and provides clear feedback to the user when fallbacks are triggered. Signed-off-by: Maayan Kashani Acked-by: Dariusz Sosnowski --- doc/guides/nics/mlx5.rst | 12 +++++-- drivers/net/mlx5/mlx5.c | 71 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 78 insertions(+), 5 deletions(-) diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index 32851d1327f..84dfbdb61cb 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -694,8 +694,11 @@ for an additional list of options shared with other mlx5 drivers. Value 2 enables the WQE based hardware steering. In this mode, only queue-based flow management is supported. - It is configured by default to 1 (DV flow steering) if supported. - Otherwise, the value is 0 which indicates legacy Verbs flow offloading. + By default, the PMD will set this value according to capability. + If DV flow steering is supported, it will be set to 1. + If DV flow steering is not supported and HW steering is supported, + then it will be set to 2. + Otherwise, it will be set to 0. - ``dv_esw_en`` parameter [int] @@ -849,7 +852,10 @@ for an additional list of options shared with other mlx5 drivers. In this case, all rules are inserted but only the first rule takes effect, the next rule takes effect only if the previous rules are deleted. - By default, the PMD will set this value to 1. + This option is not supported in :ref:`HW steering `, + and will be forced to 0 in this mode. + + By default, the PMD will set this value according to capability. .. _mlx5_net_stats: diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 666216c21a0..0c0a6343bb3 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -1440,6 +1440,45 @@ mlx5_dev_args_check_handler(const char *key, const char *val, void *opaque) return 0; } +static bool +mlx5_hws_is_supported(struct mlx5_dev_ctx_shared *sh) +{ + return (sh->cdev->config.devx && + sh->cdev->config.hca_attr.wqe_based_flow_table_sup); +} + +static bool +mlx5_sws_is_any_supported(struct mlx5_dev_ctx_shared *sh) +{ + struct mlx5_common_device *cdev = sh->cdev; + struct mlx5_hca_attr *hca_attr = &cdev->config.hca_attr; + + if (hca_attr->rx_sw_owner_v2 || hca_attr->rx_sw_owner) + return true; + + if (hca_attr->tx_sw_owner_v2 || hca_attr->tx_sw_owner) + return true; + + if (hca_attr->eswitch_manager && (hca_attr->esw_sw_owner_v2 || hca_attr->esw_sw_owner)) + return true; + + return false; +} + +static bool +mlx5_kvargs_is_used(struct mlx5_kvargs_ctrl *mkvlist, const char *key) +{ + const struct rte_kvargs_pair *pair; + uint32_t i; + + for (i = 0; i < mkvlist->kvlist->count; ++i) { + pair = &mkvlist->kvlist->pairs[i]; + if (strcmp(pair->key, key) == 0 && mkvlist->is_used[i]) + return true; + } + return false; +} + /** * Parse user device parameters and adjust them according to device * capabilities. @@ -1481,6 +1520,8 @@ mlx5_shared_dev_ctx_args_config(struct mlx5_dev_ctx_shared *sh, int ret = 0; size_t alignment = rte_mem_page_size(); uint32_t max_queue_umem_size = MLX5_WQE_SIZE * mlx5_dev_get_max_wq_size(sh); + bool hws_is_supported = mlx5_hws_is_supported(sh); + bool sws_is_supported = mlx5_sws_is_any_supported(sh); if (alignment == (size_t)-1) { alignment = (1 << MLX5_LOG_PAGE_SIZE); @@ -1491,9 +1532,15 @@ mlx5_shared_dev_ctx_args_config(struct mlx5_dev_ctx_shared *sh, memset(config, 0, sizeof(*config)); config->vf_nl_en = 1; config->dv_esw_en = 1; - config->dv_flow_en = 1; + if (!sws_is_supported && hws_is_supported) + config->dv_flow_en = 2; + else + config->dv_flow_en = 1; config->decap_en = 1; - config->allow_duplicate_pattern = 1; + if (config->dv_flow_en == 2) + config->allow_duplicate_pattern = 0; + else + config->allow_duplicate_pattern = 1; config->fdb_def_rule = 1; config->cnt_svc.cycle_time = MLX5_CNT_SVC_CYCLE_TIME_DEFAULT; config->cnt_svc.service_core = rte_get_main_lcore(); @@ -1514,6 +1561,26 @@ mlx5_shared_dev_ctx_args_config(struct mlx5_dev_ctx_shared *sh, DRV_LOG(WARNING, "DV flow is not supported."); config->dv_flow_en = 0; } + /* Inform user if DV flow is not supported. */ + if (config->dv_flow_en == 1 && !sws_is_supported && hws_is_supported) { + DRV_LOG(WARNING, "DV flow is not supported. Changing to HWS mode."); + config->dv_flow_en = 2; + } + /* Handle allow_duplicate_pattern based on final dv_flow_en mode. + * HWS mode (dv_flow_en=2) doesn't support duplicate patterns. + * Warn only if user explicitly requested an incompatible setting. + */ + bool allow_dup_pattern_set = mkvlist != NULL && + mlx5_kvargs_is_used(mkvlist, MLX5_ALLOW_DUPLICATE_PATTERN); + if (config->dv_flow_en == 2) { + if (config->allow_duplicate_pattern == 1 && allow_dup_pattern_set) + DRV_LOG(WARNING, "Duplicate pattern is not supported with HWS. Disabling it."); + config->allow_duplicate_pattern = 0; + } else if (!allow_dup_pattern_set) { + /* Non-HWS mode: set default to 1 only if not explicitly set by user */ + config->allow_duplicate_pattern = 1; + } + if (config->dv_esw_en && !sh->dev_cap.dv_esw_en) { DRV_LOG(DEBUG, "E-Switch DV flow is not supported."); config->dv_esw_en = 0; From ae57af82d5811b56cf734ac3893849fe467382b0 Mon Sep 17 00:00:00 2001 From: Gregory Etelson Date: Thu, 6 Nov 2025 15:02:28 +0200 Subject: [PATCH 60/99] net/mlx5: fix sync HWS flow sample action validation The patch validates that sample actions include terminal action. Fixes: d986f04d6529 ("net/mlx5: add functions for non-template flow sample") Signed-off-by: Gregory Etelson Acked-by: Dariusz Sosnowski --- drivers/net/mlx5/mlx5_flow.h | 3 +++ drivers/net/mlx5/mlx5_flow_hw.c | 4 --- drivers/net/mlx5/mlx5_nta_sample.c | 43 +++++++++++++++++++++++++++++- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index e8b298dd1df..26d9e80a13a 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -21,6 +21,9 @@ #include "hws/mlx5dr.h" #include "mlx5_tx.h" +#define MLX5_HW_PORT_IS_PROXY(priv) \ + (!!((priv)->sh->esw_mode && (priv)->master)) + /* E-Switch Manager port, used for rte_flow_item_port_id. */ #define MLX5_PORT_ESW_MGR UINT32_MAX diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c index 208f50fbfde..de400536c7d 100644 --- a/drivers/net/mlx5/mlx5_flow_hw.c +++ b/drivers/net/mlx5/mlx5_flow_hw.c @@ -62,10 +62,6 @@ static struct rte_flow_fp_ops mlx5_flow_hw_fp_ops; #define MLX5_HW_VLAN_PUSH_VID_IDX 1 #define MLX5_HW_VLAN_PUSH_PCP_IDX 2 -#define MLX5_HW_PORT_IS_PROXY(priv) \ - (!!((priv)->sh->esw_mode && (priv)->master)) - - struct mlx5_indlst_legacy { struct mlx5_indirect_list indirect; struct rte_flow_action_handle *handle; diff --git a/drivers/net/mlx5/mlx5_nta_sample.c b/drivers/net/mlx5/mlx5_nta_sample.c index 938108cf4c0..0b7b3d0c8ee 100644 --- a/drivers/net/mlx5/mlx5_nta_sample.c +++ b/drivers/net/mlx5/mlx5_nta_sample.c @@ -525,6 +525,42 @@ validate_prefix_actions(const struct rte_flow_action *actions) return i < MLX5_HW_MAX_ACTS - 1; } +static bool +validate_sample_terminal_actions(const struct rte_eth_dev *dev, + const struct rte_flow_attr *flow_attr, + const struct rte_flow_action *actions) +{ + uint32_t i; + const struct mlx5_priv *priv = dev->data->dev_private; + const struct rte_flow_action_ethdev *port = NULL; + bool is_proxy = MLX5_HW_PORT_IS_PROXY(priv); + const struct rte_flow_action *a = NULL; + + for (i = 0; actions[i].type != RTE_FLOW_ACTION_TYPE_END; i++) { + if (actions[i].type != RTE_FLOW_ACTION_TYPE_VOID) + a = &actions[i]; + } + if (a == NULL) + return false; + switch (a->type) { + case RTE_FLOW_ACTION_TYPE_JUMP: + case RTE_FLOW_ACTION_TYPE_QUEUE: + case RTE_FLOW_ACTION_TYPE_DROP: + case RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT: + return true; + case RTE_FLOW_ACTION_TYPE_PORT_REPRESENTOR: + if (!is_proxy || !flow_attr->transfer) + return false; + port = a->conf; + if (!port || port->port_id != MLX5_REPRESENTED_PORT_ESW_MGR) + return false; + return true; + default: + break; + } + return false; +} + static void action_append(struct rte_flow_action *actions, const struct rte_flow_action *last) { @@ -829,10 +865,15 @@ mlx5_nta_sample_flow_list_create(struct rte_eth_dev *dev, } mlx5_nta_parse_sample_actions(actions, &sample, prefix_actions, suffix_actions); if (!validate_prefix_actions(prefix_actions)) { - rte_flow_error_set(error, -EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, NULL, "Too many actions"); return NULL; } + if (!validate_sample_terminal_actions(dev, attr, sample)) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "Invalid sample actions"); + return NULL; + } sample_conf = (const struct rte_flow_action_sample *)sample->conf; sample_actions = (struct rte_flow_action *)(uintptr_t)sample_conf->actions; mirror_entry = mlx5_create_nta_mirror(dev, attr, sample_actions, From 750f635fc6a7ee287e076c5500ca97d77187676a Mon Sep 17 00:00:00 2001 From: Sivaprasad Tummala Date: Tue, 11 Nov 2025 03:40:57 +0000 Subject: [PATCH 61/99] net/mlx5: fix spurious CPU wakeups Previously, the PMD used a common monitor callback to determine CQE ownership for power-aware polling. However, when a CQE contained an invalid opcode (MLX5_CQE_INVALID), ownership bit was not reliable. As a result, the monitor condition could falsely indicate CQE availability and cause the CPU to wake up unnecessarily during low traffic periods. This resulted in spurious wakeups in monitor-wait mode and reduced the expected power savings, as cores exited the sleep state even when no valid CQEs were available. This patch introduces a dedicated callback that skips invalid CQEs and optimizes power efficiency by preventing false wakeups caused by hardware-owned or invalid entries. Fixes: a8f0df6bf98d ("net/mlx5: support power monitoring") Cc: stable@dpdk.org Signed-off-by: Sivaprasad Tummala --- drivers/net/mlx5/mlx5_rx.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c index 420a03068d5..ac663a978ee 100644 --- a/drivers/net/mlx5/mlx5_rx.c +++ b/drivers/net/mlx5/mlx5_rx.c @@ -295,6 +295,20 @@ mlx5_monitor_callback(const uint64_t value, return (value & m) == v ? -1 : 0; } +static int +mlx5_monitor_cqe_own_callback(const uint64_t value, + const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ]) +{ + const uint64_t m = opaque[CLB_MSK_IDX]; + const uint64_t v = opaque[CLB_VAL_IDX]; + const uint64_t sw_owned = ((value & m) == v); + const uint64_t opcode = MLX5_CQE_OPCODE(value); + const uint64_t valid_op = (opcode != MLX5_CQE_INVALID); + + /* ownership bit is not valid for invalid opcode; CQE is HW owned */ + return -(valid_op & sw_owned); +} + int mlx5_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc) { struct mlx5_rxq_data *rxq = rx_queue; @@ -312,12 +326,13 @@ int mlx5_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc) pmc->addr = &cqe->validity_iteration_count; pmc->opaque[CLB_VAL_IDX] = vic; pmc->opaque[CLB_MSK_IDX] = MLX5_CQE_VIC_INIT; + pmc->fn = mlx5_monitor_callback; } else { pmc->addr = &cqe->op_own; pmc->opaque[CLB_VAL_IDX] = !!idx; pmc->opaque[CLB_MSK_IDX] = MLX5_CQE_OWNER_MASK; + pmc->fn = mlx5_monitor_cqe_own_callback; } - pmc->fn = mlx5_monitor_callback; pmc->size = sizeof(uint8_t); return 0; } From 15501c4298ea029b4252b8194daa574d4e02df20 Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Wed, 12 Nov 2025 15:42:36 +0200 Subject: [PATCH 62/99] net/mlx5: fix IPv6 DSCP offset in HWS sync API The processing of field IPv6 DSCP via the synchronous flow API is different between software and hardware steering engines (HWS). The bit shift handling for IPv6 DSCP was not handled in HWS, resulting in incorrect data in the field. To resolve this, bit shift handling should be added to HWS. Fixes: ec1e7a5ceb69 ("net/mlx5: update IPv6 traffic class modification") Cc: stable@dpdk.org Signed-off-by: Gavin Li Acked-by: Bing Zhao --- drivers/net/mlx5/mlx5_flow.h | 7 +++++++ drivers/net/mlx5/mlx5_flow_dv.c | 6 ------ drivers/net/mlx5/mlx5_flow_hw.c | 5 +++++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index 26d9e80a13a..e332226fffe 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -3714,6 +3714,13 @@ flow_hw_get_ipv6_route_ext_mod_id_from_ctx(void *dr_ctx, uint8_t idx) #endif return 0; } + +static inline bool +mlx5_dv_modify_ipv6_traffic_class_supported(struct mlx5_priv *priv) +{ + return priv->sh->phdev->config.ipv6_tc_fallback == MLX5_IPV6_TC_OK; +} + void mlx5_indirect_list_handles_release(struct rte_eth_dev *dev); diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 95ca57e8c46..83046418c42 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -1638,12 +1638,6 @@ mlx5_modify_flex_item(const struct rte_eth_dev *dev, } } -static inline bool -mlx5_dv_modify_ipv6_traffic_class_supported(struct mlx5_priv *priv) -{ - return priv->sh->phdev->config.ipv6_tc_fallback == MLX5_IPV6_TC_OK; -} - void mlx5_flow_field_id_to_modify_info (const struct rte_flow_field_data *data, diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c index de400536c7d..e0f79932a56 100644 --- a/drivers/net/mlx5/mlx5_flow_hw.c +++ b/drivers/net/mlx5/mlx5_flow_hw.c @@ -1625,6 +1625,11 @@ flow_hw_modify_field_compile(struct rte_eth_dev *dev, value = *(const uint8_t *)item.spec << 24; value = rte_cpu_to_be_32(value); item.spec = &value; + } else if (conf->dst.field == RTE_FLOW_FIELD_IPV6_DSCP && + !(mask[0] & MLX5_IPV6_HDR_ECN_MASK) && + mlx5_dv_modify_ipv6_traffic_class_supported(dev->data->dev_private)) { + value = *(const unaligned_uint32_t *)item.spec << MLX5_IPV6_HDR_DSCP_SHIFT; + item.spec = &value; } } else { type = conf->operation == RTE_FLOW_MODIFY_SET ? From 472b0994319198090e44a7c2de1e43f0a0e0a270 Mon Sep 17 00:00:00 2001 From: Gregory Etelson Date: Wed, 12 Nov 2025 18:24:40 +0200 Subject: [PATCH 63/99] net/mlx5: fix send to kernel action resources release In the MLX5 PMD hierarchy a flow table is bound to a domain object. A domain object can be released if it does not reference any flow tables. When the PMD creates a send to kernel flow action, it also creates a dedicated flow table for that action. The PMD called for Rx, Tx and FDB domain destruction before it destroyed send to kernel resources - flow action and flow table. As a result, domain destruction could not be completed. The patch moves send to kernel actions and tables destruction before domain destruction. Fixes: f31a141e6478 ("net/mlx5: add send to kernel action resource holder") Cc: stable@dpdk.org Signed-off-by: Gregory Etelson Acked-by: Dariusz Sosnowski --- drivers/net/mlx5/linux/mlx5_os.c | 40 ++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index dba3b61b68d..8026a68702a 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -737,6 +737,30 @@ mlx5_alloc_shared_dr(struct rte_eth_dev *eth_dev) return err; } +#ifdef HAVE_MLX5DV_DR +static void +mlx5_destroy_send_to_kernel_action(struct mlx5_dev_ctx_shared *sh) +{ + int i; + + for (i = 0; i < MLX5DR_TABLE_TYPE_MAX; i++) { + if (sh->send_to_kernel_action[i].action) { + void *action = sh->send_to_kernel_action[i].action; + + mlx5_glue->destroy_flow_action(action); + sh->send_to_kernel_action[i].action = NULL; + } + if (sh->send_to_kernel_action[i].tbl) { + struct mlx5_flow_tbl_resource *tbl = + sh->send_to_kernel_action[i].tbl; + + flow_dv_tbl_resource_release(sh, tbl); + sh->send_to_kernel_action[i].tbl = NULL; + } + } +} +#endif /* HAVE_MLX5DV_DR */ + /** * Destroy DR related data within private structure. * @@ -763,6 +787,7 @@ mlx5_os_free_shared_dr(struct mlx5_priv *priv) priv->dev_data->port_id, i); MLX5_ASSERT(LIST_EMPTY(&sh->shared_rxqs)); #ifdef HAVE_MLX5DV_DR + mlx5_destroy_send_to_kernel_action(sh); if (sh->rx_domain) { mlx5_glue->dr_destroy_domain(sh->rx_domain); sh->rx_domain = NULL; @@ -785,21 +810,6 @@ mlx5_os_free_shared_dr(struct mlx5_priv *priv) mlx5_glue->destroy_flow_action(sh->pop_vlan_action); sh->pop_vlan_action = NULL; } - for (i = 0; i < MLX5DR_TABLE_TYPE_MAX; i++) { - if (sh->send_to_kernel_action[i].action) { - void *action = sh->send_to_kernel_action[i].action; - - mlx5_glue->destroy_flow_action(action); - sh->send_to_kernel_action[i].action = NULL; - } - if (sh->send_to_kernel_action[i].tbl) { - struct mlx5_flow_tbl_resource *tbl = - sh->send_to_kernel_action[i].tbl; - - flow_dv_tbl_resource_release(sh, tbl); - sh->send_to_kernel_action[i].tbl = NULL; - } - } #endif /* HAVE_MLX5DV_DR */ if (sh->default_miss_action) mlx5_glue->destroy_flow_action From 1c23465a9a34a59576165945a6905dbb4f956431 Mon Sep 17 00:00:00 2001 From: Gregory Etelson Date: Thu, 13 Nov 2025 09:17:45 +0200 Subject: [PATCH 64/99] net/mlx5: fix cross GVMI metadata match for E-Switch setup The metadata flow item did not check for cross GVMI support. Fixes: a78425ba3793 ("net/mlx5: support flow metadata between E-Switch and VM") Signed-off-by: Gregory Etelson Acked-by: Bing Zhao --- drivers/net/mlx5/mlx5_flow.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index e332226fffe..83a4adc971f 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -1810,7 +1810,8 @@ flow_hw_get_reg_id_by_domain(struct rte_eth_dev *dev, switch (type) { case RTE_FLOW_ITEM_TYPE_META: if (sh->config.dv_esw_en && - sh->config.dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS) { + (sh->config.dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS || + mlx5_esw_metadata_passing_enabled(sh))) { return REG_C_1; } if ((mlx5_vport_rx_metadata_passing_enabled(sh) && From c41f621786ff2892ac3504368c966230a41acda3 Mon Sep 17 00:00:00 2001 From: Dariusz Sosnowski Date: Thu, 13 Nov 2025 14:17:58 +0100 Subject: [PATCH 65/99] net/mlx5: remove representor matching devarg As announced in 25.07 release deprecation notice, this patch removes repr_matching_en device argument from mlx5 driver. Applications which disabled this option were able to receive traffic from any physical port/VF/SF on any representor. Specifically, in most cases, this was used to process all traffic on representor which is a transfer proxy port. Similar behavior in mlx5 PMD can be achieved without the use of additional device arguments, by using RTE_FLOW_ACTION_TYPE_RSS flow action in transfer flow rules. Signed-off-by: Dariusz Sosnowski Acked-by: Bing Zhao --- doc/guides/nics/mlx5.rst | 24 +-- doc/guides/rel_notes/deprecation.rst | 11 -- doc/guides/rel_notes/release_25_11.rst | 8 + drivers/net/mlx5/linux/mlx5_os.c | 16 -- drivers/net/mlx5/mlx5.c | 13 -- drivers/net/mlx5/mlx5.h | 1 - drivers/net/mlx5/mlx5_flow.c | 5 +- drivers/net/mlx5/mlx5_flow.h | 7 - drivers/net/mlx5/mlx5_flow_hw.c | 194 +------------------------ drivers/net/mlx5/mlx5_trigger.c | 28 ++-- drivers/net/mlx5/mlx5_txq.c | 17 +-- 11 files changed, 24 insertions(+), 300 deletions(-) diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index 84dfbdb61cb..2cf95578deb 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -758,24 +758,6 @@ for an additional list of options shared with other mlx5 drivers. ,representor=pf[0,2]vf[0-2] -- ``repr_matching_en`` parameter [int] - - - 0. If representor matching is disabled, then there will be no implicit - item added. As a result, ingress flow rules will match traffic - coming to any port, not only the port on which flow rule is created. - Because of that, default flow rules for ingress traffic cannot be created - and port starts in isolated mode by default. Port cannot be switched back - to non-isolated mode. - - - 1. If representor matching is enabled (default setting), - then each ingress pattern template has an implicit REPRESENTED_PORT - item added. Flow rules based on this pattern template will match - the vport associated with port on which rule is created. - - .. note:: - - This parameter is deprecated and will be removed in future releases. - - ``max_dump_files_num`` parameter [int] The maximum number of files per PMD entity that may be created for debug information. @@ -2359,7 +2341,6 @@ Runtime configuration The behaviour of port representors is configured with some :ref:`parameters `. -The option ``repr_matching_en`` has an impact on flow steering. Limitations ^^^^^^^^^^^ @@ -2369,9 +2350,6 @@ Limitations #. A driver limitation for ``RTE_FLOW_ACTION_TYPE_PORT_REPRESENTOR`` action restricts the ``port_id`` configuration to only accept the value ``0xffff``, indicating the E-Switch manager. - If the ``repr_matching_en`` parameter is enabled, the traffic will be directed - to the representor of the source virtual port (SF/VF), while if it is disabled, - the traffic will be routed based on the steering rules in the ingress domain. Examples ^^^^^^^^ @@ -3246,7 +3224,7 @@ Limitations #. Only single item is supported per pattern template. -#. In switch mode, when ``repr_matching_en`` is enabled (default setting), +#. In switch mode, matching ``RTE_FLOW_ITEM_TYPE_COMPARE`` is not supported for ``ingress`` rules. This is because an implicit ``RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT`` needs to be added to the matcher, diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst index 1e60ac4e826..8653d6ca03b 100644 --- a/doc/guides/rel_notes/deprecation.rst +++ b/doc/guides/rel_notes/deprecation.rst @@ -147,14 +147,3 @@ Deprecation Notices This change will not result in any feature loss, as the fallback scalar paths which have feature parity with SSE will be used in the cases where the SSE paths would have been used. - -* net/mlx5: ``repr_matching_en`` device argument is deprecated - and will be removed in DPDK 25.11 release. - With disabled representor matching, behavior of Rx datapath in mlx5 PMD - is incompatible with current DPDK representor model. - Packets from any E-Switch port can arrive on any representor, - depending only on created flow rules. - Such working model should be exposed directly in DPDK ethdev API, - without relying on flow API. - Currently there is no alternative API - providing the same functionality as with ``repr_matching_en`` set to 0. diff --git a/doc/guides/rel_notes/release_25_11.rst b/doc/guides/rel_notes/release_25_11.rst index 76cab94fb9e..83e86ada64b 100644 --- a/doc/guides/rel_notes/release_25_11.rst +++ b/doc/guides/rel_notes/release_25_11.rst @@ -244,6 +244,14 @@ Removed Items the functions ``rte_tel_data_add_array_u64`` and ``rte_tel_data_add_dict_u64`` are removed. They are replaced by ``rte_tel_data_add_array_uint`` and ``rte_tel_data_add_dict_uint`` respectively. +* net/mlx5: ``repr_matching_en`` device argument has been removed. + Applications which disabled this option were able to receive traffic + from any physical port/VF/SF on any representor port. + Specifically, in most cases, this was used to process all traffic on representor port + which is a transfer proxy port. + Similar behavior in mlx5 PMD can be achieved without this device argument, + by using ``RTE_FLOW_ACTION_TYPE_RSS`` in transfer flow rules. + API Changes ----------- diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 8026a68702a..e642e2664e8 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -1787,22 +1787,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, err = EINVAL; goto error; } - /* - * If representor matching is disabled, PMD cannot create default flow rules - * to receive traffic for all ports, since implicit source port match is not added. - * Isolated mode is forced. - */ - if (priv->sh->config.dv_esw_en && !priv->sh->config.repr_matching) { - err = mlx5_flow_isolate(eth_dev, 1, NULL); - if (err < 0) { - err = -err; - goto error; - } - DRV_LOG(WARNING, "port %u ingress traffic is restricted to defined " - "flow rules (isolated mode) since representor " - "matching is disabled", - eth_dev->data->port_id); - } eth_dev->data->dev_flags |= RTE_ETH_DEV_FLOW_OPS_THREAD_SAFE; return eth_dev; #else diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 0c0a6343bb3..d39319efd17 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -182,9 +182,6 @@ /* HW steering counter's query interval. */ #define MLX5_HWS_CNT_CYCLE_TIME "svc_cycle_time" -/* Device parameter to control representor matching in ingress/egress flows with HWS. */ -#define MLX5_REPR_MATCHING_EN "repr_matching_en" - /* * Alignment of the Tx queue starting address, * If not set, using separate umem and MR for each TxQ. @@ -1432,8 +1429,6 @@ mlx5_dev_args_check_handler(const char *key, const char *val, void *opaque) config->cnt_svc.service_core = tmp; } else if (strcmp(MLX5_HWS_CNT_CYCLE_TIME, key) == 0) { config->cnt_svc.cycle_time = tmp; - } else if (strcmp(MLX5_REPR_MATCHING_EN, key) == 0) { - config->repr_matching = !!tmp; } else if (strcmp(MLX5_TXQ_MEM_ALGN, key) == 0) { config->txq_mem_algn = (uint32_t)tmp; } @@ -1513,7 +1508,6 @@ mlx5_shared_dev_ctx_args_config(struct mlx5_dev_ctx_shared *sh, MLX5_FDB_DEFAULT_RULE_EN, MLX5_HWS_CNT_SERVICE_CORE, MLX5_HWS_CNT_CYCLE_TIME, - MLX5_REPR_MATCHING_EN, MLX5_TXQ_MEM_ALGN, NULL, }; @@ -1544,7 +1538,6 @@ mlx5_shared_dev_ctx_args_config(struct mlx5_dev_ctx_shared *sh, config->fdb_def_rule = 1; config->cnt_svc.cycle_time = MLX5_CNT_SVC_CYCLE_TIME_DEFAULT; config->cnt_svc.service_core = rte_get_main_lcore(); - config->repr_matching = 1; config->txq_mem_algn = log2above(alignment); if (mkvlist != NULL) { /* Process parameters. */ @@ -1599,11 +1592,6 @@ mlx5_shared_dev_ctx_args_config(struct mlx5_dev_ctx_shared *sh, config->dv_xmeta_en); config->dv_xmeta_en = MLX5_XMETA_MODE_LEGACY; } - if (config->dv_flow_en != 2 && !config->repr_matching) { - DRV_LOG(DEBUG, "Disabling representor matching is valid only " - "when HW Steering is enabled."); - config->repr_matching = 1; - } if (config->tx_pp && !sh->dev_cap.txpp_en) { DRV_LOG(ERR, "Packet pacing is not supported."); rte_errno = ENODEV; @@ -1658,7 +1646,6 @@ mlx5_shared_dev_ctx_args_config(struct mlx5_dev_ctx_shared *sh, DRV_LOG(DEBUG, "\"allow_duplicate_pattern\" is %u.", config->allow_duplicate_pattern); DRV_LOG(DEBUG, "\"fdb_def_rule_en\" is %u.", config->fdb_def_rule); - DRV_LOG(DEBUG, "\"repr_matching_en\" is %u.", config->repr_matching); DRV_LOG(DEBUG, "\"txq_mem_algn\" is %u.", config->txq_mem_algn); return 0; } diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 7e4bfacd11a..966e802f5fd 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -397,7 +397,6 @@ struct mlx5_sh_config { uint32_t lro_allowed:1; /* Whether LRO is allowed. */ /* Allow/Prevent the duplicate rules pattern. */ uint32_t fdb_def_rule:1; /* Create FDB default jump rule */ - uint32_t repr_matching:1; /* Enable implicit vport matching in HWS FDB. */ uint32_t txq_mem_algn; /* logarithm value of the TxQ address alignment. */ struct { uint16_t service_core; diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index ed67a90a224..2c48f1b01b4 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -8818,10 +8818,7 @@ mlx5_flow_isolate(struct rte_eth_dev *dev, "port must be stopped first"); return -rte_errno; } - if (!enable && !priv->sh->config.repr_matching) - return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, - "isolated mode cannot be disabled when " - "representor matching is disabled"); + priv->isolated = !!enable; if (enable) dev->dev_ops = &mlx5_dev_ops_isolate; diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index 83a4adc971f..db408d7b38b 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -3033,9 +3033,6 @@ struct mlx5_flow_hw_ctrl_fdb { struct rte_flow_pattern_template *port_items_tmpl; struct rte_flow_actions_template *jump_one_actions_tmpl; struct rte_flow_template_table *hw_esw_zero_tbl; - struct rte_flow_pattern_template *tx_meta_items_tmpl; - struct rte_flow_actions_template *tx_meta_actions_tmpl; - struct rte_flow_template_table *hw_tx_meta_cpy_tbl; struct rte_flow_pattern_template *lacp_rx_items_tmpl; struct rte_flow_actions_template *lacp_rx_actions_tmpl; struct rte_flow_template_table *hw_lacp_rx_tbl; @@ -3614,11 +3611,7 @@ int mlx5_flow_hw_esw_create_sq_miss_flow(struct rte_eth_dev *dev, int mlx5_flow_hw_esw_destroy_sq_miss_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external); int mlx5_flow_hw_esw_create_default_jump_flow(struct rte_eth_dev *dev); -int mlx5_flow_hw_create_fdb_tx_default_mreg_copy_flow(struct rte_eth_dev *dev, - uint32_t sqn, bool external); int mlx5_flow_hw_create_nic_tx_default_mreg_copy_flow(struct rte_eth_dev *dev, uint32_t sqn); -int mlx5_flow_hw_destroy_tx_default_mreg_copy_flow(struct rte_eth_dev *dev, - uint32_t sqn, bool external); int mlx5_flow_hw_create_tx_repr_matching_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external); int mlx5_flow_hw_destroy_tx_repr_matching_flow(struct rte_eth_dev *dev, diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c index e0f79932a56..0c9b9500993 100644 --- a/drivers/net/mlx5/mlx5_flow_hw.c +++ b/drivers/net/mlx5/mlx5_flow_hw.c @@ -5321,7 +5321,7 @@ __translate_group(struct rte_eth_dev *dev, "group index not supported"); *table_group = group + 1; } else if (config->dv_esw_en && - (config->repr_matching || config->dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS) && + config->dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS && external && flow_attr->egress) { /* @@ -8577,7 +8577,7 @@ __flow_hw_pattern_validate(struct rte_eth_dev *dev, break; } case RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT: - if (attr->ingress && priv->sh->config.repr_matching) + if (attr->ingress) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, NULL, "represented port item cannot be used" @@ -9052,14 +9052,12 @@ flow_hw_pattern_template_create(struct rte_eth_dev *dev, return NULL; orig_item_nb = rc; if (priv->sh->config.dv_esw_en && - priv->sh->config.repr_matching && attr->ingress && !attr->egress && !attr->transfer) { copied_items = flow_hw_prepend_item(items, orig_item_nb, &port, error); if (!copied_items) return NULL; tmpl_items = copied_items; } else if (priv->sh->config.dv_esw_en && - priv->sh->config.repr_matching && !attr->ingress && attr->egress && !attr->transfer) { if (item_flags & MLX5_FLOW_ITEM_SQ) { DRV_LOG(DEBUG, "Port %u omitting implicit REG_C_0 match for egress " @@ -10007,7 +10005,6 @@ flow_hw_setup_tx_repr_tagging(struct rte_eth_dev *dev, struct rte_flow_error *er }; MLX5_ASSERT(priv->sh->config.dv_esw_en); - MLX5_ASSERT(priv->sh->config.repr_matching); priv->hw_tx_repr_tagging_pt = flow_hw_create_tx_repr_sq_pattern_tmpl(dev, error); if (!priv->hw_tx_repr_tagging_pt) @@ -10186,46 +10183,6 @@ flow_hw_create_ctrl_port_pattern_template(struct rte_eth_dev *dev, return flow_hw_pattern_template_create(dev, &attr, items, error); } -/* - * Creating a flow pattern template with all ETH packets matching. - * This template is used to set up a table for default Tx copy (Tx metadata - * to REG_C_1) flow rule usage. - * - * @param dev - * Pointer to Ethernet device. - * @param error - * Pointer to error structure. - * - * @return - * Pointer to flow pattern template on success, NULL otherwise. - */ -static struct rte_flow_pattern_template * -flow_hw_create_tx_default_mreg_copy_pattern_template(struct rte_eth_dev *dev, - struct rte_flow_error *error) -{ - struct rte_flow_pattern_template_attr tx_pa_attr = { - .relaxed_matching = 0, - .egress = 1, - }; - struct rte_flow_item_eth promisc = { - .hdr.dst_addr.addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .hdr.src_addr.addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .hdr.ether_type = 0, - }; - struct rte_flow_item eth_all[] = { - [0] = { - .type = RTE_FLOW_ITEM_TYPE_ETH, - .spec = &promisc, - .mask = &promisc, - }, - [1] = { - .type = RTE_FLOW_ITEM_TYPE_END, - }, - }; - - return flow_hw_pattern_template_create(dev, &tx_pa_attr, eth_all, error); -} - /* * Creating a flow pattern template with all LACP packets matching, only for NIC * ingress domain. @@ -10746,15 +10703,6 @@ flow_hw_cleanup_ctrl_fdb_tables(struct rte_eth_dev *dev) if (hw_ctrl_fdb->lacp_rx_items_tmpl) claim_zero(flow_hw_pattern_template_destroy(dev, hw_ctrl_fdb->lacp_rx_items_tmpl, NULL)); - /* Clean up templates used for default Tx metadata copy. */ - if (hw_ctrl_fdb->hw_tx_meta_cpy_tbl) - claim_zero(flow_hw_table_destroy(dev, hw_ctrl_fdb->hw_tx_meta_cpy_tbl, NULL)); - if (hw_ctrl_fdb->tx_meta_actions_tmpl) - claim_zero(flow_hw_actions_template_destroy(dev, hw_ctrl_fdb->tx_meta_actions_tmpl, - NULL)); - if (hw_ctrl_fdb->tx_meta_items_tmpl) - claim_zero(flow_hw_pattern_template_destroy(dev, hw_ctrl_fdb->tx_meta_items_tmpl, - NULL)); /* Clean up templates used for default FDB jump rule. */ if (hw_ctrl_fdb->hw_esw_zero_tbl) claim_zero(flow_hw_table_destroy(dev, hw_ctrl_fdb->hw_esw_zero_tbl, NULL)); @@ -10841,8 +10789,6 @@ flow_hw_create_fdb_ctrl_tables(struct rte_eth_dev *dev, struct rte_flow_error *e { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_flow_hw_ctrl_fdb *hw_ctrl_fdb; - uint32_t xmeta = priv->sh->config.dv_xmeta_en; - uint32_t repr_matching = priv->sh->config.repr_matching; uint32_t fdb_def_rule = priv->sh->config.fdb_def_rule; MLX5_ASSERT(priv->hw_ctrl_fdb == NULL); @@ -10928,32 +10874,6 @@ flow_hw_create_fdb_ctrl_tables(struct rte_eth_dev *dev, struct rte_flow_error *e goto err; } } - /* Create templates and table for default Tx metadata copy flow rule. */ - if (!repr_matching && xmeta == MLX5_XMETA_MODE_META32_HWS) { - hw_ctrl_fdb->tx_meta_items_tmpl = - flow_hw_create_tx_default_mreg_copy_pattern_template(dev, error); - if (!hw_ctrl_fdb->tx_meta_items_tmpl) { - DRV_LOG(ERR, "port %u failed to Tx metadata copy pattern" - " template for control flows", dev->data->port_id); - goto err; - } - hw_ctrl_fdb->tx_meta_actions_tmpl = - flow_hw_create_tx_default_mreg_copy_actions_template(dev, error); - if (!hw_ctrl_fdb->tx_meta_actions_tmpl) { - DRV_LOG(ERR, "port %u failed to Tx metadata copy actions" - " template for control flows", dev->data->port_id); - goto err; - } - hw_ctrl_fdb->hw_tx_meta_cpy_tbl = - flow_hw_create_tx_default_mreg_copy_table - (dev, hw_ctrl_fdb->tx_meta_items_tmpl, - hw_ctrl_fdb->tx_meta_actions_tmpl, error); - if (!hw_ctrl_fdb->hw_tx_meta_cpy_tbl) { - DRV_LOG(ERR, "port %u failed to create table for default" - " Tx metadata copy flow rule", dev->data->port_id); - goto err; - } - } /* Create LACP default miss table. */ if (!priv->sh->config.lacp_by_user && priv->pf_bond >= 0 && priv->master) { hw_ctrl_fdb->lacp_rx_items_tmpl = @@ -12187,7 +12107,7 @@ __flow_hw_configure(struct rte_eth_dev *dev, if (!priv->hw_tag[i]) goto err; } - if (priv->sh->config.dv_esw_en && priv->sh->config.repr_matching) { + if (priv->sh->config.dv_esw_en) { ret = flow_hw_setup_tx_repr_tagging(dev, error); if (ret) goto err; @@ -16096,62 +16016,6 @@ mlx5_flow_hw_esw_create_default_jump_flow(struct rte_eth_dev *dev) items, 0, actions, 0, &flow_info, false); } -int -mlx5_flow_hw_create_fdb_tx_default_mreg_copy_flow(struct rte_eth_dev *dev, - uint32_t sqn, bool external) -{ - struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_rte_flow_item_sq sq_spec = { - .queue = sqn, - }; - struct rte_flow_item items[] = { - { - .type = (enum rte_flow_item_type)MLX5_RTE_FLOW_ITEM_TYPE_SQ, - .spec = &sq_spec, - }, - { - .type = RTE_FLOW_ITEM_TYPE_END, - }, - }; - struct rte_flow_action_modify_field mreg_action = { - .operation = RTE_FLOW_MODIFY_SET, - .dst = { - .field = (enum rte_flow_field_id)MLX5_RTE_FLOW_FIELD_META_REG, - .tag_index = REG_C_1, - }, - .src = { - .field = (enum rte_flow_field_id)MLX5_RTE_FLOW_FIELD_META_REG, - .tag_index = REG_A, - }, - .width = 32, - }; - struct rte_flow_action copy_reg_action[] = { - [0] = { - .type = RTE_FLOW_ACTION_TYPE_MODIFY_FIELD, - .conf = &mreg_action, - }, - [1] = { - .type = RTE_FLOW_ACTION_TYPE_JUMP, - }, - [2] = { - .type = RTE_FLOW_ACTION_TYPE_END, - }, - }; - struct mlx5_ctrl_flow_info flow_info = { - .type = MLX5_CTRL_FLOW_TYPE_TX_META_COPY, - .tx_repr_sq = sqn, - }; - - MLX5_ASSERT(priv->master); - if (!priv->dr_ctx || - !priv->hw_ctrl_fdb || - !priv->hw_ctrl_fdb->hw_tx_meta_cpy_tbl) - return 0; - return flow_hw_create_ctrl_flow(dev, dev, - priv->hw_ctrl_fdb->hw_tx_meta_cpy_tbl, - items, 0, copy_reg_action, 0, &flow_info, external); -} - int mlx5_flow_hw_create_nic_tx_default_mreg_copy_flow(struct rte_eth_dev *dev, uint32_t sqn) { @@ -16202,56 +16066,6 @@ mlx5_flow_hw_create_nic_tx_default_mreg_copy_flow(struct rte_eth_dev *dev, uint3 items, 0, copy_reg_action, 0, &flow_info, false); } -static bool -flow_hw_is_matching_tx_mreg_copy_flow(struct mlx5_ctrl_flow_entry *cf, - struct rte_eth_dev *dev, - uint32_t sqn) -{ - if (cf->owner_dev != dev) - return false; - if (cf->info.type == MLX5_CTRL_FLOW_TYPE_TX_META_COPY && cf->info.tx_repr_sq == sqn) - return true; - return false; -} - -int -mlx5_flow_hw_destroy_tx_default_mreg_copy_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external) -{ - uint16_t port_id = dev->data->port_id; - uint16_t proxy_port_id = dev->data->port_id; - struct rte_eth_dev *proxy_dev; - struct mlx5_priv *proxy_priv; - struct mlx5_ctrl_flow_entry *cf; - struct mlx5_ctrl_flow_entry *cf_next; - int ret; - - ret = rte_flow_pick_transfer_proxy(port_id, &proxy_port_id, NULL); - if (ret) { - DRV_LOG(ERR, "Unable to pick transfer proxy port for port %u. Transfer proxy " - "port must be present for default SQ miss flow rules to exist.", - port_id); - return ret; - } - proxy_dev = &rte_eth_devices[proxy_port_id]; - proxy_priv = proxy_dev->data->dev_private; - if (!proxy_priv->dr_ctx || - !proxy_priv->hw_ctrl_fdb || - !proxy_priv->hw_ctrl_fdb->hw_tx_meta_cpy_tbl) - return 0; - cf = external ? LIST_FIRST(&proxy_priv->hw_ext_ctrl_flows) : - LIST_FIRST(&proxy_priv->hw_ctrl_flows); - while (cf != NULL) { - cf_next = LIST_NEXT(cf, next); - if (flow_hw_is_matching_tx_mreg_copy_flow(cf, dev, sqn)) { - claim_zero(flow_hw_destroy_ctrl_flow(proxy_dev, cf->flow)); - LIST_REMOVE(cf, next); - mlx5_free(cf); - } - cf = cf_next; - } - return 0; -} - int mlx5_flow_hw_create_tx_repr_matching_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external) { @@ -16283,8 +16097,6 @@ mlx5_flow_hw_create_tx_repr_matching_flow(struct rte_eth_dev *dev, uint32_t sqn, .tx_repr_sq = sqn, }; - /* It is assumed that caller checked for representor matching. */ - MLX5_ASSERT(priv->sh->config.repr_matching); if (!priv->dr_ctx) { DRV_LOG(DEBUG, "Port %u must be configured for HWS, before creating " "default egress flow rules. Omitting creation.", diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c index 996c1eb6acb..c0fb98e78a5 100644 --- a/drivers/net/mlx5/mlx5_trigger.c +++ b/drivers/net/mlx5/mlx5_trigger.c @@ -1132,9 +1132,9 @@ mlx5_hw_representor_port_allowed_start(struct rte_eth_dev *dev) rte_errno = EAGAIN; return -rte_errno; } - if (priv->sh->config.repr_matching && !priv->dr_ctx) { - DRV_LOG(ERR, "Failed to start port %u: with representor matching enabled, port " - "must be configured for HWS", dev->data->port_id); + if (priv->dr_ctx == NULL) { + DRV_LOG(ERR, "Failed to start port %u: port must be configured for HWS", + dev->data->port_id); rte_errno = EINVAL; return -rte_errno; } @@ -1621,27 +1621,17 @@ mlx5_traffic_enable_hws(struct rte_eth_dev *dev) goto error; } } - if (config->dv_esw_en && config->repr_matching) { + if (config->dv_esw_en) { if (mlx5_flow_hw_create_tx_repr_matching_flow(dev, queue, false)) { mlx5_txq_release(dev, i); goto error; } } - /* - * With extended metadata enabled, the Tx metadata copy is handled by default - * Tx tagging flow rules, so default Tx flow rule is not needed. It is only - * required when representor matching is disabled. - */ - if (config->dv_esw_en && !config->repr_matching && - config->dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS && - (priv->master || priv->representor)) { - ret = mlx5_flow_hw_create_fdb_tx_default_mreg_copy_flow(dev, queue, false); - } else if (mlx5_vport_tx_metadata_passing_enabled(priv->sh)) { - ret = mlx5_flow_hw_create_nic_tx_default_mreg_copy_flow(dev, queue); - } - if (ret != 0) { - mlx5_txq_release(dev, i); - goto error; + if (mlx5_vport_tx_metadata_passing_enabled(priv->sh)) { + if (mlx5_flow_hw_create_nic_tx_default_mreg_copy_flow(dev, queue)) { + mlx5_txq_release(dev, i); + goto error; + } } mlx5_txq_release(dev, i); } diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index e273486809d..ad15b20e7b9 100644 --- a/drivers/net/mlx5/mlx5_txq.c +++ b/drivers/net/mlx5/mlx5_txq.c @@ -1453,20 +1453,12 @@ rte_pmd_mlx5_external_sq_enable(uint16_t port_id, uint32_t sq_num) sq_miss_created = true; } - if (priv->sh->config.repr_matching && - mlx5_flow_hw_create_tx_repr_matching_flow(dev, sq_num, true)) { + if (mlx5_flow_hw_create_tx_repr_matching_flow(dev, sq_num, true)) { if (sq_miss_created) mlx5_flow_hw_esw_destroy_sq_miss_flow(dev, sq_num, true); return -rte_errno; } - if (!priv->sh->config.repr_matching && - priv->sh->config.dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS && - mlx5_flow_hw_create_fdb_tx_default_mreg_copy_flow(dev, sq_num, true)) { - if (sq_miss_created) - mlx5_flow_hw_esw_destroy_sq_miss_flow(dev, sq_num, true); - return -rte_errno; - } return 0; } #endif @@ -1510,12 +1502,7 @@ rte_pmd_mlx5_external_sq_disable(uint16_t port_id, uint32_t sq_num) if (priv->sh->config.fdb_def_rule && mlx5_flow_hw_esw_destroy_sq_miss_flow(dev, sq_num, true)) return -rte_errno; - if (priv->sh->config.repr_matching && - mlx5_flow_hw_destroy_tx_repr_matching_flow(dev, sq_num, true)) - return -rte_errno; - if (!priv->sh->config.repr_matching && - priv->sh->config.dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS && - mlx5_flow_hw_destroy_tx_default_mreg_copy_flow(dev, sq_num, true)) + if (mlx5_flow_hw_destroy_tx_repr_matching_flow(dev, sq_num, true)) return -rte_errno; return 0; } From dbaed15366cb9aa66d7e0a580462a042ecfb602f Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 12 Nov 2025 09:40:15 +0200 Subject: [PATCH 66/99] net/mlx5: release representor interrupt handler The cited commit removed the representor interrupt handler cleanup by mistake. Fixes: 5cf0707fc7e9 ("net/mlx5: remove Rx queue data list from device") Signed-off-by: Roi Dayan Acked-by: Suanming Mou --- drivers/net/mlx5/mlx5.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index d39319efd17..1d07ca4293a 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -2443,6 +2443,11 @@ mlx5_dev_close(struct rte_eth_dev *dev) /* Free the eCPRI flex parser resource. */ mlx5_flex_parser_ecpri_release(dev); mlx5_flex_item_port_cleanup(dev); + if (priv->representor) { + /* Each representor has a dedicated interrupts handler */ + rte_intr_instance_free(dev->intr_handle); + dev->intr_handle = NULL; + } mlx5_indirect_list_handles_release(dev); #ifdef HAVE_MLX5_HWS_SUPPORT mlx5_nta_sample_context_free(dev); From aef94343d3d0b7e11071747f0d2fd66546d7b724 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 12 Nov 2025 09:41:31 +0200 Subject: [PATCH 67/99] common/mlx5: release unused mempool entries When creating a new mempool but assigning a shared entries from a different mempool need to release the newly unused allocated entries. Fix it. Fixes: 8947eebc999e ("common/mlx5: fix shared memory region ranges allocation") Signed-off-by: Roi Dayan Signed-off-by: Gregory Etelson Acked-by: Suanming Mou --- drivers/common/mlx5/mlx5_common_mr.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/common/mlx5/mlx5_common_mr.c b/drivers/common/mlx5/mlx5_common_mr.c index c41ffff2d5a..8ed988dec91 100644 --- a/drivers/common/mlx5/mlx5_common_mr.c +++ b/drivers/common/mlx5/mlx5_common_mr.c @@ -1717,18 +1717,24 @@ mlx5_mr_mempool_register_primary(struct mlx5_mr_share_cache *share_cache, * hugepage can be shared across mempools that also fit in it. */ if (share_hugepage) { + struct mlx5_mempool_mr *gc_mrs = NULL; + rte_rwlock_write_lock(&share_cache->rwlock); LIST_FOREACH(mpr, &share_cache->mempool_reg_list, next) { if (mpr->mrs[0].pmd_mr.addr == (void *)ranges[0].start) break; } if (mpr != NULL) { + /* Releasing MRs here can create a dead-lock on share_cache->rwlock */ + gc_mrs = new_mpr->mrs; new_mpr->mrs = mpr->mrs; mlx5_mempool_reg_attach(new_mpr); LIST_INSERT_HEAD(&share_cache->mempool_reg_list, new_mpr, next); } rte_rwlock_write_unlock(&share_cache->rwlock); + if (gc_mrs != NULL) + mlx5_free(gc_mrs); if (mpr != NULL) { DRV_LOG(DEBUG, "Shared MR %#x in PD %p for mempool %s with mempool %s", mpr->mrs[0].pmd_mr.lkey, pd, mp->name, From 8d1fe10768d2749d00ceb7124866c61d31164380 Mon Sep 17 00:00:00 2001 From: Nupur Uttarwar Date: Wed, 12 Nov 2025 09:47:39 +0200 Subject: [PATCH 68/99] net/mlx5/hws: fix buddy memory allocation `buddy` was erroroneously declared as static. When multiple threads call this routine, they set the same static variable corrupting pool data and can cause potential double free when releasing resources. Fixes: b4dd7bcb0dcb ("net/mlx5/hws: add pool and buddy") Signed-off-by: Nupur Uttarwar Acked-by: Dariusz Sosnowski --- .mailmap | 1 + drivers/net/mlx5/hws/mlx5dr_buddy.c | 1 + drivers/net/mlx5/hws/mlx5dr_pool.c | 3 +-- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index f9c5622e5dc..efc66fd48c1 100644 --- a/.mailmap +++ b/.mailmap @@ -1180,6 +1180,7 @@ Noa Ezra Nobuhiro Miki Norbert Ciosek Norbert Zulinski +Nupur Uttarwar Odi Assli Ofer Dagan Ognjen Joldzic diff --git a/drivers/net/mlx5/hws/mlx5dr_buddy.c b/drivers/net/mlx5/hws/mlx5dr_buddy.c index 394ca712176..abfbdf1be53 100644 --- a/drivers/net/mlx5/hws/mlx5dr_buddy.c +++ b/drivers/net/mlx5/hws/mlx5dr_buddy.c @@ -147,6 +147,7 @@ void mlx5dr_buddy_cleanup(struct mlx5dr_buddy_mem *buddy) simple_free(buddy->num_free); simple_free(buddy->bits); + simple_free(buddy); } int mlx5dr_buddy_alloc_mem(struct mlx5dr_buddy_mem *buddy, int order) diff --git a/drivers/net/mlx5/hws/mlx5dr_pool.c b/drivers/net/mlx5/hws/mlx5dr_pool.c index 8e3718ac42a..e383027d3e5 100644 --- a/drivers/net/mlx5/hws/mlx5dr_pool.c +++ b/drivers/net/mlx5/hws/mlx5dr_pool.c @@ -167,7 +167,7 @@ static struct mlx5dr_buddy_mem * mlx5dr_pool_buddy_get_next_buddy(struct mlx5dr_pool *pool, int idx, uint32_t order, bool *is_new_buddy) { - static struct mlx5dr_buddy_mem *buddy; + struct mlx5dr_buddy_mem *buddy; uint32_t new_buddy_size; buddy = pool->db.buddy_manager->buddies[idx]; @@ -271,7 +271,6 @@ static void mlx5dr_pool_buddy_db_uninit(struct mlx5dr_pool *pool) buddy = pool->db.buddy_manager->buddies[i]; if (buddy) { mlx5dr_buddy_cleanup(buddy); - simple_free(buddy); pool->db.buddy_manager->buddies[i] = NULL; } } From 860f6c63dbc1cc6ae6bbaca886c04b88d43a2236 Mon Sep 17 00:00:00 2001 From: Maayan Kashani Date: Thu, 13 Nov 2025 21:37:11 +0200 Subject: [PATCH 69/99] net/mlx5: fix device start error handling When mlx5_dev_start() fails partway through initialization, the error cleanup code unconditionally calls cleanup functions for all steps, including those that were never successfully initialized. This causes state corruption leading to incorrect behavior on subsequent start attempts. The issue manifests as: 1. First start attempt fails with -ENOMEM (expected) 2. Second start attempt returns -EINVAL instead of -ENOMEM 3. With flow isolated mode, second attempt incorrectly succeeds, leading to segfault in rte_eth_rx_burst() Root cause: The single error label cleanup path calls functions like mlx5_traffic_disable() and mlx5_flow_stop_default() even when their corresponding initialization functions (mlx5_traffic_enable() and mlx5_flow_start_default()) were never called due to earlier failure. For example, when mlx5_rxq_start() fails: - mlx5_traffic_enable() at line 1403 never executes - mlx5_flow_start_default() at line 1420 never executes - But cleanup unconditionally calls: * mlx5_traffic_disable() - destroys control flows list * mlx5_flow_stop_default() - corrupts flow metadata state This corrupts the device state, causing subsequent start attempts to fail with different errors or, in isolated mode, to incorrectly succeed with an improperly initialized device. Fix by replacing the single error label with cascading error labels (Linux kernel style). Each label cleans up only its corresponding step, then falls through to clean up earlier steps. This ensures only successfully initialized steps are cleaned up, maintaining device state consistency across failed start attempts. Bugzilla ID: 1419 Fixes: 8db7e3b69822 ("net/mlx5: change operations for non-cached flows") Cc: stable@dpdk.org Signed-off-by: Maayan Kashani Acked-by: Dariusz Sosnowski --- drivers/net/mlx5/mlx5_trigger.c | 66 +++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c index c0fb98e78a5..9aa36ae9a8b 100644 --- a/drivers/net/mlx5/mlx5_trigger.c +++ b/drivers/net/mlx5/mlx5_trigger.c @@ -1226,6 +1226,11 @@ static void mlx5_dev_free_consec_tx_mem(struct rte_eth_dev *dev, bool on_stop) } } +#define SAVE_RTE_ERRNO_AND_STOP(ret, dev) do { \ + ret = rte_errno; \ + (dev)->data->dev_started = 0; \ +} while (0) + /** * DPDK callback to start the device. * @@ -1316,25 +1321,30 @@ mlx5_dev_start(struct rte_eth_dev *dev) if (ret) { DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s", dev->data->port_id, strerror(rte_errno)); + SAVE_RTE_ERRNO_AND_STOP(ret, dev); goto error; } if (mlx5_devx_obj_ops_en(priv->sh) && priv->obj_ops.lb_dummy_queue_create) { ret = priv->obj_ops.lb_dummy_queue_create(dev); - if (ret) - goto error; + if (ret) { + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto txpp_stop; + } } ret = mlx5_dev_allocate_consec_tx_mem(dev); if (ret) { DRV_LOG(ERR, "port %u Tx queues memory allocation failed: %s", dev->data->port_id, strerror(rte_errno)); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto lb_dummy_queue_release; } ret = mlx5_txq_start(dev); if (ret) { DRV_LOG(ERR, "port %u Tx queue allocation failed: %s", dev->data->port_id, strerror(rte_errno)); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto free_consec_tx_mem; } if (priv->config.std_delay_drop || priv->config.hp_delay_drop) { if (!priv->sh->dev_cap.vf && !priv->sh->dev_cap.sf && @@ -1358,7 +1368,8 @@ mlx5_dev_start(struct rte_eth_dev *dev) if (ret) { DRV_LOG(ERR, "port %u Rx queue allocation failed: %s", dev->data->port_id, strerror(rte_errno)); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto txq_stop; } /* * Such step will be skipped if there is no hairpin TX queue configured @@ -1368,7 +1379,8 @@ mlx5_dev_start(struct rte_eth_dev *dev) if (ret) { DRV_LOG(ERR, "port %u hairpin auto binding failed: %s", dev->data->port_id, strerror(rte_errno)); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto rxq_stop; } /* Set started flag here for the following steps like control flow. */ dev->data->dev_started = 1; @@ -1376,7 +1388,8 @@ mlx5_dev_start(struct rte_eth_dev *dev) if (ret) { DRV_LOG(ERR, "port %u Rx interrupt vector creation failed", dev->data->port_id); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto rxq_stop; } mlx5_os_stats_init(dev); /* @@ -1388,7 +1401,8 @@ mlx5_dev_start(struct rte_eth_dev *dev) DRV_LOG(ERR, "port %u failed to attach indirect actions: %s", dev->data->port_id, rte_strerror(rte_errno)); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto rx_intr_vec_disable; } #ifdef HAVE_MLX5_HWS_SUPPORT if (priv->sh->config.dv_flow_en == 2) { @@ -1396,7 +1410,8 @@ mlx5_dev_start(struct rte_eth_dev *dev) if (ret) { DRV_LOG(ERR, "port %u failed to update HWS tables", dev->data->port_id); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto action_handle_detach; } } #endif @@ -1404,7 +1419,8 @@ mlx5_dev_start(struct rte_eth_dev *dev) if (ret) { DRV_LOG(ERR, "port %u failed to set defaults flows", dev->data->port_id); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto action_handle_detach; } /* Set dynamic fields and flags into Rx queues. */ mlx5_flow_rxq_dynf_set(dev); @@ -1421,12 +1437,14 @@ mlx5_dev_start(struct rte_eth_dev *dev) if (ret) { DRV_LOG(DEBUG, "port %u failed to start default actions: %s", dev->data->port_id, strerror(rte_errno)); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto traffic_disable; } if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) { DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s", dev->data->port_id, rte_strerror(rte_errno)); - goto error; + SAVE_RTE_ERRNO_AND_STOP(ret, dev); + goto stop_default; } if (mlx5_flow_is_steering_disabled()) mlx5_flow_rxq_mark_flag_set(dev); @@ -1455,19 +1473,27 @@ mlx5_dev_start(struct rte_eth_dev *dev) priv->sh->port[priv->dev_port - 1].devx_ih_port_id = (uint32_t)dev->data->port_id; return 0; -error: - ret = rte_errno; /* Save rte_errno before cleanup. */ - /* Rollback. */ - dev->data->dev_started = 0; +stop_default: mlx5_flow_stop_default(dev); +traffic_disable: mlx5_traffic_disable(dev); - mlx5_txq_stop(dev); +action_handle_detach: + mlx5_action_handle_detach(dev); +rx_intr_vec_disable: + mlx5_rx_intr_vec_disable(dev); +rxq_stop: mlx5_rxq_stop(dev); +txq_stop: + mlx5_txq_stop(dev); +free_consec_tx_mem: + mlx5_dev_free_consec_tx_mem(dev, false); +lb_dummy_queue_release: if (priv->obj_ops.lb_dummy_queue_release) priv->obj_ops.lb_dummy_queue_release(dev); - mlx5_dev_free_consec_tx_mem(dev, false); - mlx5_txpp_stop(dev); /* Stop last. */ - rte_errno = ret; /* Restore rte_errno. */ +txpp_stop: + mlx5_txpp_stop(dev); +error: + rte_errno = ret; return -rte_errno; } From 7429374afba9827a43cf2efabce14e27ccc4bdef Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 14 Nov 2025 09:49:12 -0800 Subject: [PATCH 70/99] net/mlx5: fix uninitialized variable Gcc-16 detects use of uninitialized variable. If the retry loop exits the code would do memcmp against uninitialized stack value. Resolve by initializing to zero. Bugzilla ID: 1823 Fixes: 1256805dd54d ("net/mlx5: move Linux-specific functions") Fixes: cfee94752b8f ("net/mlx5: fix link status to use wait to complete") Cc: stable@dpdk.org Signed-off-by: Stephen Hemminger Acked-by: Raslan Darawsheh --- drivers/net/mlx5/linux/mlx5_ethdev_os.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c index 4d8fbed1882..50997c187c5 100644 --- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c +++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c @@ -512,7 +512,7 @@ int mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) { int ret; - struct rte_eth_link dev_link; + struct rte_eth_link dev_link = { 0 }; time_t start_time = time(NULL); int retry = MLX5_GET_LINK_STATUS_RETRY_COUNT; @@ -2073,4 +2073,3 @@ int mlx5_txpp_map_hca_bar(struct rte_eth_dev *dev) rte_mem_unmap(base, MLX5_ST_SZ_BYTES(initial_seg)); return 0; } - From d5a82110efc70dbcff27a5f347ba0d82bab7e36e Mon Sep 17 00:00:00 2001 From: Dariusz Sosnowski Date: Fri, 14 Nov 2025 20:27:12 +0100 Subject: [PATCH 71/99] net/mlx5: improve error on completing table resize If flow template table was created with resizable attribute, then completing table resize could fail for 2 user-related reasons: - not all flow rules were yet updated to use the resized table, - resize was not started. Both of these were reported with the same error message i.e., "cannot complete table resize". Since PMD can distinguish these 2 cases, this patch improves the error reporting to report these 2 errors separately. Also, this patch removes redundant __rte_unused on device parameter. Signed-off-by: Dariusz Sosnowski Acked-by: Gregory Etelson Acked-by: Bing Zhao --- drivers/net/mlx5/mlx5_flow_hw.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c index 0c9b9500993..3b015ec9a05 100644 --- a/drivers/net/mlx5/mlx5_flow_hw.c +++ b/drivers/net/mlx5/mlx5_flow_hw.c @@ -15305,7 +15305,7 @@ flow_hw_table_resize(struct rte_eth_dev *dev, } static int -flow_hw_table_resize_complete(__rte_unused struct rte_eth_dev *dev, +flow_hw_table_resize_complete(struct rte_eth_dev *dev, struct rte_flow_template_table *table, struct rte_flow_error *error) { @@ -15319,12 +15319,16 @@ flow_hw_table_resize_complete(__rte_unused struct rte_eth_dev *dev, return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, table, "no resizable attribute"); + if (matcher_info->matcher == NULL) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + table, "table resize was not started"); matcher_refcnt = rte_atomic_load_explicit(&matcher_info->refcnt, rte_memory_order_relaxed); - if (!matcher_info->matcher || matcher_refcnt) + if (matcher_refcnt > 0) return rte_flow_error_set(error, EBUSY, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - table, "cannot complete table resize"); + table, "all rules not yet updated"); if (matcher_info->jump) mlx5dr_action_destroy(matcher_info->jump); ret = mlx5dr_matcher_destroy(matcher_info->matcher); From 3087db16ab13cdd6996b1f3ea8c64171c2e8fd8f Mon Sep 17 00:00:00 2001 From: Dariusz Sosnowski Date: Fri, 14 Nov 2025 21:17:20 +0100 Subject: [PATCH 72/99] net/mlx5: fix flow tag indexes support on root table Offending patch introduced support for additional flow tag indexes with HW Steering flow engine. New tag indexes will be mapped to HW registers REG_C_8 to REG_C_11, depending on HW capabilities. That patch only handled tables created on group > 0 (non-root table), where mlx5 PMD directly configures the HW. Tables and flow rules on group 0 (root table) are handled through kernel driver, and new registers were not addressed for that case. Because of that, usage of unsupported tag index in group 0 triggered an assertion in flow_dv_match_meta_reg(). This patch adds necessary definitions for REG_C_8 to REG_C_11 to make these registers usable for flow tag indexes in root table. Validation of flow tag to HW register translation is also amended to report invalid cases to the user, instead of relying on assertions. Fixes: 7e3a14423c1a ("net/mlx5/hws: support 4 additional C registers") Cc: stable@dpdk.org Signed-off-by: Dariusz Sosnowski --- drivers/common/mlx5/mlx5_prm.h | 6 ++++- drivers/net/mlx5/mlx5_flow.h | 3 ++- drivers/net/mlx5/mlx5_flow_dv.c | 40 ++++++++++++++++++++++++++++----- 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h index 6cde3f8f1a1..9383e098933 100644 --- a/drivers/common/mlx5/mlx5_prm.h +++ b/drivers/common/mlx5/mlx5_prm.h @@ -1205,7 +1205,11 @@ struct mlx5_ifc_fte_match_set_misc5_bits { u8 tunnel_header_1[0x20]; u8 tunnel_header_2[0x20]; u8 tunnel_header_3[0x20]; - u8 reserved[0x100]; + u8 reserved[0x80]; + u8 metadata_reg_c_8[0x20]; + u8 metadata_reg_c_9[0x20]; + u8 metadata_reg_c_10[0x20]; + u8 metadata_reg_c_11[0x20]; }; /* Flow matcher. */ diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index db408d7b38b..81590085048 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -1837,7 +1837,8 @@ flow_hw_get_reg_id_by_domain(struct rte_eth_dev *dev, case RTE_FLOW_ITEM_TYPE_TAG: if (id == RTE_PMD_MLX5_LINEAR_HASH_TAG_INDEX) return REG_C_3; - MLX5_ASSERT(id < MLX5_FLOW_HW_TAGS_MAX); + if (id >= MLX5_FLOW_HW_TAGS_MAX) + return REG_NON; return reg->hw_avl_tags[id]; default: return REG_NON; diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 83046418c42..47f6d284103 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -10554,8 +10554,8 @@ static void flow_dv_match_meta_reg(void *key, enum modify_reg reg_type, uint32_t data, uint32_t mask) { - void *misc2_v = - MLX5_ADDR_OF(fte_match_param, key, misc_parameters_2); + void *misc2_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters_2); + void *misc5_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters_5); uint32_t temp; if (!key) @@ -10601,6 +10601,18 @@ flow_dv_match_meta_reg(void *key, enum modify_reg reg_type, case REG_C_7: MLX5_SET(fte_match_set_misc2, misc2_v, metadata_reg_c_7, data); break; + case REG_C_8: + MLX5_SET(fte_match_set_misc5, misc5_v, metadata_reg_c_8, data); + break; + case REG_C_9: + MLX5_SET(fte_match_set_misc5, misc5_v, metadata_reg_c_9, data); + break; + case REG_C_10: + MLX5_SET(fte_match_set_misc5, misc5_v, metadata_reg_c_10, data); + break; + case REG_C_11: + MLX5_SET(fte_match_set_misc5, misc5_v, metadata_reg_c_11, data); + break; default: MLX5_ASSERT(false); break; @@ -10819,8 +10831,11 @@ flow_dv_translate_mlx5_item_tag(struct rte_eth_dev *dev, void *key, * Flow pattern to translate. * @param[in] key_type * Set flow matcher mask or value. + * + * @return + * 0 on success. Negative errno value otherwise. */ -static void +static int flow_dv_translate_item_tag(struct rte_eth_dev *dev, void *key, const struct rte_flow_item *item, uint32_t key_type) @@ -10832,7 +10847,7 @@ flow_dv_translate_item_tag(struct rte_eth_dev *dev, void *key, uint32_t index; if (MLX5_ITEM_VALID(item, key_type)) - return; + return 0; MLX5_ITEM_UPDATE(item, key_type, tag_v, tag_m, &rte_flow_item_tag_mask); /* When set mask, the index should be from spec. */ @@ -10842,8 +10857,18 @@ flow_dv_translate_item_tag(struct rte_eth_dev *dev, void *key, reg = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, index, NULL); else reg = flow_hw_get_reg_id(dev, RTE_FLOW_ITEM_TYPE_TAG, index); - MLX5_ASSERT(reg > 0); + if (reg < 0) { + DRV_LOG(ERR, "port %u tag index %u does not map to correct register", + dev->data->port_id, index); + return -EINVAL; + } + if (reg == REG_NON) { + DRV_LOG(ERR, "port %u tag index %u maps to unsupported register", + dev->data->port_id, index); + return -ENOTSUP; + } flow_dv_match_meta_reg(key, (enum modify_reg)reg, tag_v->data, tag_m->data); + return 0; } /** @@ -14402,7 +14427,10 @@ flow_dv_translate_items(struct rte_eth_dev *dev, last_item = MLX5_FLOW_LAYER_ICMP6; break; case RTE_FLOW_ITEM_TYPE_TAG: - flow_dv_translate_item_tag(dev, key, items, key_type); + ret = flow_dv_translate_item_tag(dev, key, items, key_type); + if (ret < 0) + return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM, NULL, + "invalid flow tag item"); last_item = MLX5_FLOW_ITEM_TAG; break; case MLX5_RTE_FLOW_ITEM_TYPE_TAG: From 970309c7aa889ee06178c6476fe153e17096fc25 Mon Sep 17 00:00:00 2001 From: Itamar Gozlan Date: Sun, 16 Nov 2025 13:59:25 +0200 Subject: [PATCH 73/99] net/mlx5/hws: fix flow rule hash capability Some cases are not supported for rule hash calculation. For example when the matcher defined as FW matcher, or is the hash type is different than CRC32. One case is when the distribute mode is not by hash, the previous condition checked a wrong capability, while this commit fixes it to the correct check. Fixes: 7f5e6de53aae ("net/mlx5/hws: query flow rule hash") Cc: stable@dpdk.org Signed-off-by: Itamar Gozlan Acked-by: Bing Zhao --- drivers/net/mlx5/hws/mlx5dr_rule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/mlx5/hws/mlx5dr_rule.c b/drivers/net/mlx5/hws/mlx5dr_rule.c index 878224d8b3f..895ac858eca 100644 --- a/drivers/net/mlx5/hws/mlx5dr_rule.c +++ b/drivers/net/mlx5/hws/mlx5dr_rule.c @@ -1084,7 +1084,7 @@ int mlx5dr_rule_hash_calculate(struct mlx5dr_matcher *matcher, if (mlx5dr_matcher_req_fw_wqe(matcher) || mlx5dr_table_is_root(matcher->tbl) || - matcher->tbl->ctx->caps->access_index_mode == MLX5DR_MATCHER_INSERT_BY_HASH || + matcher->attr.distribute_mode != MLX5DR_MATCHER_DISTRIBUTE_BY_HASH || matcher->tbl->ctx->caps->flow_table_hash_type != MLX5_FLOW_TABLE_HASH_TYPE_CRC32) { DR_LOG(DEBUG, "Matcher is not supported"); rte_errno = ENOTSUP; From df19cf4aa720935a31edcb2954a88cfd038880e6 Mon Sep 17 00:00:00 2001 From: Shani Peretz Date: Sun, 16 Nov 2025 14:10:26 +0200 Subject: [PATCH 74/99] net/mlx5: fix null dereference in modify header GCC analyzer identified a code path where acts->mhdr could be NULL when dereferenced. When modify header validation fails in mlx5_tbl_translate_modify_header(), __flow_hw_action_template_destroy() sets acts->mhdr to NULL. Add defensive NULL check in mlx5_tbl_ensure_shared_modify_header() to prevent the dereference. Bugzilla ID: 1521 Fixes: 12f2ed3f03c8 ("net/mlx5: set modify header as shared flow action") Cc: stable@dpdk.org Signed-off-by: Shani Peretz Acked-by: Dariusz Sosnowski --- drivers/net/mlx5/mlx5_flow_hw.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c index 3b015ec9a05..2695d047c78 100644 --- a/drivers/net/mlx5/mlx5_flow_hw.c +++ b/drivers/net/mlx5/mlx5_flow_hw.c @@ -2336,9 +2336,10 @@ mlx5_tbl_translate_modify_header(struct rte_eth_dev *dev, .sz = sizeof(struct mlx5_modification_cmd) * mhdr->mhdr_cmds_num }; - if (flow_hw_validate_compiled_modify_field(dev, cfg, mhdr, error)) { + int ret = flow_hw_validate_compiled_modify_field(dev, cfg, mhdr, error); + if (ret) { __flow_hw_action_template_destroy(dev, acts); - return -rte_errno; + return ret; } acts->mhdr = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*acts->mhdr), 0, SOCKET_ID_ANY); @@ -2371,9 +2372,14 @@ mlx5_tbl_ensure_shared_modify_header(struct rte_eth_dev *dev, const struct rte_flow_attr *attr = &table_attr->flow_attr; enum mlx5dr_table_type tbl_type = get_mlx5dr_table_type(attr, table_attr->specialize, unified_fdb); - struct mlx5dr_action_mh_pattern pattern = { - .sz = sizeof(struct mlx5_modification_cmd) * acts->mhdr->mhdr_cmds_num - }; + struct mlx5dr_action_mh_pattern pattern; + + if (!acts->mhdr) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "translate modify_header: mhdr is NULL"); + + pattern.sz = sizeof(struct mlx5_modification_cmd) * acts->mhdr->mhdr_cmds_num; uint16_t mhdr_ix = acts->mhdr->pos; uint32_t flags = mlx5_hw_act_flag[!!attr->group][tbl_type] | MLX5DR_ACTION_FLAG_SHARED; From 327682174e1a282c1351c83aacd2b9e8f8d352f2 Mon Sep 17 00:00:00 2001 From: Nupur Uttarwar Date: Mon, 17 Nov 2025 07:05:57 +0200 Subject: [PATCH 75/99] net/mlx5: skip Rx control flow tables in isolated mode If flow isolation is enabled, then skip flow_hw_create_ctrl_rx_tables because these are not used with flow isolation. This is used to save the unneeded resource allocation and also speed up the device startup time. Fixes: 9fa7c1cddb85 ("net/mlx5: create control flow rules with HWS") Cc: stable@dpdk.org Signed-off-by: Nupur Uttarwar Signed-off-by: Bing Zhao Acked-by: Dariusz Sosnowski --- drivers/net/mlx5/mlx5_flow.h | 2 ++ drivers/net/mlx5/mlx5_flow_hw.c | 17 ++++------------- drivers/net/mlx5/mlx5_trigger.c | 14 +++++++++++++- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index 81590085048..d7c9d4d0eac 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -3054,6 +3054,8 @@ struct mlx5_flow_hw_ctrl_nic { #define MLX5_CTRL_VLAN_FILTER (RTE_BIT32(6)) int mlx5_flow_hw_ctrl_flows(struct rte_eth_dev *dev, uint32_t flags); +int mlx5_flow_hw_create_ctrl_rx_tables(struct rte_eth_dev *dev); +void mlx5_flow_hw_cleanup_ctrl_rx_tables(struct rte_eth_dev *dev); /** Create a control flow rule for matching unicast DMAC with VLAN (Verbs and DV). */ int mlx5_legacy_dmac_flow_create(struct rte_eth_dev *dev, const struct rte_ether_addr *addr); diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c index 2695d047c78..c60f836de48 100644 --- a/drivers/net/mlx5/mlx5_flow_hw.c +++ b/drivers/net/mlx5/mlx5_flow_hw.c @@ -11183,8 +11183,8 @@ flow_hw_create_vlan(struct rte_eth_dev *dev) return 0; } -static void -flow_hw_cleanup_ctrl_rx_tables(struct rte_eth_dev *dev) +void +mlx5_flow_hw_cleanup_ctrl_rx_tables(struct rte_eth_dev *dev) { struct mlx5_priv *priv = dev->data->dev_private; unsigned int i; @@ -11469,8 +11469,8 @@ flow_hw_create_ctrl_rx_pattern_template return flow_hw_pattern_template_create(dev, &attr, items, NULL); } -static int -flow_hw_create_ctrl_rx_tables(struct rte_eth_dev *dev) +int +mlx5_flow_hw_create_ctrl_rx_tables(struct rte_eth_dev *dev) { struct mlx5_priv *priv = dev->data->dev_private; unsigned int i; @@ -11506,8 +11506,6 @@ flow_hw_create_ctrl_rx_tables(struct rte_eth_dev *dev) return 0; err: ret = rte_errno; - flow_hw_cleanup_ctrl_rx_tables(dev); - rte_errno = ret; return -ret; } @@ -11706,7 +11704,6 @@ __flow_hw_resource_release(struct rte_eth_dev *dev, bool ctx_close) flow_hw_cleanup_ctrl_fdb_tables(dev); flow_hw_cleanup_ctrl_nic_tables(dev); flow_hw_cleanup_tx_repr_tagging(dev); - flow_hw_cleanup_ctrl_rx_tables(dev); flow_hw_action_template_drop_release(dev); grp = LIST_FIRST(&priv->flow_hw_grp); while (grp) { @@ -12064,12 +12061,6 @@ __flow_hw_configure(struct rte_eth_dev *dev, ret = flow_hw_action_template_drop_init(dev, error); if (ret) goto err; - ret = flow_hw_create_ctrl_rx_tables(dev); - if (ret) { - rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, - "Failed to set up Rx control flow templates"); - goto err; - } /* Initialize quotas */ if (port_attr->nb_quotas || (host_priv && host_priv->quota_ctx.devx_obj)) { ret = mlx5_flow_quota_init(dev, port_attr->nb_quotas); diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c index 9aa36ae9a8b..028844e45d6 100644 --- a/drivers/net/mlx5/mlx5_trigger.c +++ b/drivers/net/mlx5/mlx5_trigger.c @@ -1676,6 +1676,12 @@ mlx5_traffic_enable_hws(struct rte_eth_dev *dev) goto error; if (priv->isolated) return 0; + ret = mlx5_flow_hw_create_ctrl_rx_tables(dev); + if (ret) { + DRV_LOG(ERR, "Failed to set up Rx control flow templates for port %u, %d", + dev->data->port_id, -ret); + goto error; + } if (dev->data->promiscuous) flags |= MLX5_CTRL_PROMISCUOUS; if (dev->data->all_multicast) @@ -1689,6 +1695,7 @@ mlx5_traffic_enable_hws(struct rte_eth_dev *dev) error: ret = rte_errno; mlx5_flow_hw_flush_ctrl_flows(dev); + mlx5_flow_hw_cleanup_ctrl_rx_tables(dev); rte_errno = ret; return -rte_errno; } @@ -1929,8 +1936,13 @@ mlx5_traffic_disable(struct rte_eth_dev *dev) #ifdef HAVE_MLX5_HWS_SUPPORT struct mlx5_priv *priv = dev->data->dev_private; - if (priv->sh->config.dv_flow_en == 2) + if (priv->sh->config.dv_flow_en == 2) { + /* Device started flag was cleared before, this is used to derefer the Rx queues. */ + priv->hws_rule_flushing = true; mlx5_flow_hw_flush_ctrl_flows(dev); + mlx5_flow_hw_cleanup_ctrl_rx_tables(dev); + priv->hws_rule_flushing = false; + } else #endif mlx5_traffic_disable_legacy(dev); From ad7db900a05397773b812e76655918086b07ab36 Mon Sep 17 00:00:00 2001 From: Maayan Kashani Date: Mon, 17 Nov 2025 09:15:36 +0200 Subject: [PATCH 76/99] net/mlx5: fix crash on flow rule destruction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mlx5_ipool_free() function was called with a NULL pool pointer during HW flow destruction, causing a segmentation fault. This occurred when flow creation failed and the cleanup path attempted to free resources from an uninitialized flow pool. The crash happened in the following scenario: 1. During device start, a default NTA copy action flow is created 2. If the flow creation fails, mlx5_flow_hw_list_destroy() is called 3. In hw_cmpl_flow_update_or_destroy(), table->flow pool could be NULL 4. mlx5_ipool_free(table->flow, flow->idx) was called without checking if table->flow is NULL 5. Inside mlx5_ipool_free(), accessing pool->cfg.per_core_cache caused a segmentation fault due to NULL pointer dereference The fix adds two layers of protection, 1. Add NULL check for table->flow before calling mlx5_ipool_free() in hw_cmpl_flow_update_or_destroy(), consistent with the existing check for table->resource on the previous line 2. Add NULL check for pool parameter in mlx5_ipool_free() as a defensive measure to prevent similar crashes in other code paths The fix also renames the ‘flow’ field in rte_flow_template_table to ‘flow_pool’ for better code readability. Stack trace of the fault: mlx5_ipool_free (pool=0x0) at mlx5_utils.c:753 hw_cmpl_flow_update_or_destroy at mlx5_flow_hw.c:4481 mlx5_flow_hw_destroy at mlx5_flow_hw.c:14219 mlx5_flow_hw_list_destroy at mlx5_flow_hw.c:14279 flow_hw_list_create at mlx5_flow_hw.c:14415 mlx5_flow_start_default at mlx5_flow.c:8263 mlx5_dev_start at mlx5_trigger.c:1420 Fixes: 27d171b88031 ("net/mlx5: abstract flow action and enable reconfigure") Cc: stable@dpdk.org Signed-off-by: Maayan Kashani Acked-by: Bing Zhao --- drivers/net/mlx5/mlx5_flow.h | 2 +- drivers/net/mlx5/mlx5_flow_hw.c | 25 +++++++++++++------------ drivers/net/mlx5/mlx5_utils.c | 2 +- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index d7c9d4d0eac..218b55d5361 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -1744,7 +1744,7 @@ struct rte_flow_template_table { struct rte_flow_pattern_template *its[MLX5_HW_TBL_MAX_ITEM_TEMPLATE]; /* Action templates bind to the table. */ struct mlx5_hw_action_template ats[MLX5_HW_TBL_MAX_ACTION_TEMPLATE]; - struct mlx5_indexed_pool *flow; /* The table's flow ipool. */ + struct mlx5_indexed_pool *flow_pool; /* The table's flow ipool. */ struct rte_flow_hw_aux *flow_aux; /**< Auxiliary data stored per flow. */ struct mlx5_indexed_pool *resource; /* The table's resource ipool. */ struct mlx5_flow_template_table_cfg cfg; diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c index c60f836de48..f8995b53cc2 100644 --- a/drivers/net/mlx5/mlx5_flow_hw.c +++ b/drivers/net/mlx5/mlx5_flow_hw.c @@ -3958,7 +3958,7 @@ flow_hw_async_flow_create_generic(struct rte_eth_dev *dev, items, pattern_template_index, actions, action_template_index, error)) return NULL; } - flow = mlx5_ipool_malloc(table->flow, &flow_idx); + flow = mlx5_ipool_malloc(table->flow_pool, &flow_idx); if (!flow) { rte_errno = ENOMEM; goto error; @@ -4048,7 +4048,7 @@ flow_hw_async_flow_create_generic(struct rte_eth_dev *dev, if (table->resource && res_idx) mlx5_ipool_free(table->resource, res_idx); if (flow_idx) - mlx5_ipool_free(table->flow, flow_idx); + mlx5_ipool_free(table->flow_pool, flow_idx); if (sub_error.cause != RTE_FLOW_ERROR_TYPE_NONE && error != NULL) *error = sub_error; else @@ -4498,7 +4498,8 @@ hw_cmpl_flow_update_or_destroy(struct rte_eth_dev *dev, if (!flow->nt_rule) { if (table->resource) mlx5_ipool_free(table->resource, res_idx); - mlx5_ipool_free(table->flow, flow->idx); + if (table->flow_pool) + mlx5_ipool_free(table->flow_pool, flow->idx); } } } @@ -4786,7 +4787,7 @@ flow_hw_q_flow_flush(struct rte_eth_dev *dev, LIST_FOREACH(tbl, &priv->flow_hw_tbl, next) { if (!tbl->cfg.external) continue; - MLX5_IPOOL_FOREACH(tbl->flow, fidx, flow) { + MLX5_IPOOL_FOREACH(tbl->flow_pool, fidx, flow) { if (flow_hw_async_flow_destroy(dev, MLX5_DEFAULT_FLUSH_QUEUE, &attr, @@ -5108,8 +5109,8 @@ flow_hw_table_create(struct rte_eth_dev *dev, goto error; tbl->cfg = *table_cfg; /* Allocate flow indexed pool. */ - tbl->flow = mlx5_ipool_create(&cfg); - if (!tbl->flow) + tbl->flow_pool = mlx5_ipool_create(&cfg); + if (!tbl->flow_pool) goto error; /* Allocate table of auxiliary flow rule structs. */ tbl->flow_aux = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct rte_flow_hw_aux) * nb_flows, @@ -5264,8 +5265,8 @@ flow_hw_table_create(struct rte_eth_dev *dev, &tbl->grp->entry); if (tbl->flow_aux) mlx5_free(tbl->flow_aux); - if (tbl->flow) - mlx5_ipool_destroy(tbl->flow); + if (tbl->flow_pool) + mlx5_ipool_destroy(tbl->flow_pool); mlx5_free(tbl); } if (error != NULL) { @@ -5495,10 +5496,10 @@ flow_hw_table_destroy(struct rte_eth_dev *dev, /* Build ipool allocated object bitmap. */ if (table->resource) mlx5_ipool_flush_cache(table->resource); - mlx5_ipool_flush_cache(table->flow); + mlx5_ipool_flush_cache(table->flow_pool); /* Check if ipool has allocated objects. */ if (table->refcnt || - mlx5_ipool_get_next(table->flow, &fidx) || + mlx5_ipool_get_next(table->flow_pool, &fidx) || (table->resource && mlx5_ipool_get_next(table->resource, &ridx))) { DRV_LOG(WARNING, "Table %p is still in use.", (void *)table); return rte_flow_error_set(error, EBUSY, @@ -5528,7 +5529,7 @@ flow_hw_table_destroy(struct rte_eth_dev *dev, if (table->resource) mlx5_ipool_destroy(table->resource); mlx5_free(table->flow_aux); - mlx5_ipool_destroy(table->flow); + mlx5_ipool_destroy(table->flow_pool); mlx5_free(table); return 0; } @@ -15227,7 +15228,7 @@ flow_hw_table_resize(struct rte_eth_dev *dev, return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, table, "shrinking table is not supported"); - ret = mlx5_ipool_resize(table->flow, nb_flows, error); + ret = mlx5_ipool_resize(table->flow_pool, nb_flows, error); if (ret) return ret; /* diff --git a/drivers/net/mlx5/mlx5_utils.c b/drivers/net/mlx5/mlx5_utils.c index cba8cc3f490..defcf80dd7d 100644 --- a/drivers/net/mlx5/mlx5_utils.c +++ b/drivers/net/mlx5/mlx5_utils.c @@ -748,7 +748,7 @@ mlx5_ipool_free(struct mlx5_indexed_pool *pool, uint32_t idx) uint32_t trunk_idx; uint32_t entry_idx; - if (!idx) + if (!pool || !idx) return; if (pool->cfg.per_core_cache) { mlx5_ipool_free_cache(pool, idx); From aff44ada9abc5831601e7dad5d48d9c6b5493f2d Mon Sep 17 00:00:00 2001 From: Bing Zhao Date: Mon, 17 Nov 2025 09:27:52 +0200 Subject: [PATCH 77/99] net/mlx5: move auxiliary data inline Since auxiliary structure is associated with per rule, it can be allocated in the same ipool allocation to save the extra overhead of the *alloc header and the unneeded CPU cycles. Fixes: 27d171b88031 ("net/mlx5: abstract flow action and enable reconfigure") Cc: stable@dpdk.org Signed-off-by: Bing Zhao Acked-by: Dariusz Sosnowski --- drivers/net/mlx5/linux/mlx5_os.c | 7 ++++--- drivers/net/mlx5/mlx5_flow_hw.c | 9 ++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index e642e2664e8..7f73183bb14 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -1643,16 +1643,17 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, /* Read link status in case it is up and there will be no event. */ mlx5_link_update(eth_dev, 0); /* Watch LSC interrupts between port probe and port start. */ - priv->sh->port[priv->dev_port - 1].nl_ih_port_id = - eth_dev->data->port_id; + priv->sh->port[priv->dev_port - 1].nl_ih_port_id = eth_dev->data->port_id; mlx5_set_link_up(eth_dev); for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) { icfg[i].release_mem_en = !!sh->config.reclaim_mode; if (sh->config.reclaim_mode) icfg[i].per_core_cache = 0; #ifdef HAVE_MLX5_HWS_SUPPORT - if (priv->sh->config.dv_flow_en == 2) + if (priv->sh->config.dv_flow_en == 2) { icfg[i].size = sizeof(struct rte_flow_hw) + sizeof(struct rte_flow_nt2hws); + icfg[i].size += sizeof(struct rte_flow_hw_aux); + } #endif priv->flows[i] = mlx5_ipool_create(&icfg[i]); if (!priv->flows[i]) diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c index f8995b53cc2..c41b99746ff 100644 --- a/drivers/net/mlx5/mlx5_flow_hw.c +++ b/drivers/net/mlx5/mlx5_flow_hw.c @@ -13500,8 +13500,9 @@ static int flow_hw_prepare(struct rte_eth_dev *dev, (*flow)->nt2hws = (struct rte_flow_nt2hws *) ((uintptr_t)(*flow) + sizeof(struct rte_flow_hw)); (*flow)->idx = idx; - (*flow)->nt2hws->flow_aux = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct rte_flow_hw_aux), - RTE_CACHE_LINE_SIZE, rte_dev_numa_node(dev->device)); + (*flow)->nt2hws->flow_aux = (struct rte_flow_hw_aux *) + ((uintptr_t)((*flow)->nt2hws) + sizeof(struct rte_flow_nt2hws)); + if (!(*flow)->nt2hws->flow_aux) return rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -14152,10 +14153,8 @@ flow_hw_destroy(struct rte_eth_dev *dev, struct rte_flow_hw *flow) * Notice matcher destroy will take place when matcher's list is destroyed * , same as for DV. */ - if (flow->nt2hws->flow_aux) { - mlx5_free(flow->nt2hws->flow_aux); + if (flow->nt2hws->flow_aux) flow->nt2hws->flow_aux = NULL; - } if (flow->nt2hws->rix_encap_decap) { flow_encap_decap_resource_release(dev, flow->nt2hws->rix_encap_decap); flow->nt2hws->rix_encap_decap = 0; From e0b87fa079b9f35750f4d6fb71a00ab0ca19d170 Mon Sep 17 00:00:00 2001 From: Itai Sharoni Date: Mon, 17 Nov 2025 10:45:00 +0200 Subject: [PATCH 78/99] net/mlx5/windows: fix match criteria in flow creation Currently the Windows implementation hardcodes match criteria to MLX5_MATCH_OUTER_HEADERS when creating flow rules, which prevents matching on inner headers and other criteria types like NVGRE. The fix uses the matcher's match_criteria_enable attribute instead of hardcoding OUTER_HEADERS, and moves the assignment outside the action switch block to apply to all cases. NVGRE item type is also added to the supported items list. Fixes: 1d194496b9d1 ("net/mlx5: create flow rule on Windows") Cc: stable@dpdk.org Signed-off-by: Itai Sharoni Acked-by: Bing Zhao --- .mailmap | 1 + doc/guides/nics/mlx5.rst | 4 ++++ drivers/net/mlx5/windows/mlx5_flow_os.c | 4 ++-- drivers/net/mlx5/windows/mlx5_flow_os.h | 1 + 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index efc66fd48c1..2f089326ff1 100644 --- a/.mailmap +++ b/.mailmap @@ -639,6 +639,7 @@ Ilyes Ben Hamouda Intiyaz Basha Ioana Ciornei Isaac Boukris +Itai Sharoni Itamar Gozlan Itsuro Oda Ivan Boule diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index 2cf95578deb..51d7637fba2 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -1231,6 +1231,10 @@ On Windows, the features are limited: - IPv4/TCP with CVLAN filtering - L4 steering rules for port RSS of IP, UDP, TCP +- Tunnel protocol support: + + - NVGRE (requires DevX dynamic insertion mode) + .. _mlx5_multiproc: diff --git a/drivers/net/mlx5/windows/mlx5_flow_os.c b/drivers/net/mlx5/windows/mlx5_flow_os.c index 7a625fb880a..15c6fc56133 100644 --- a/drivers/net/mlx5/windows/mlx5_flow_os.c +++ b/drivers/net/mlx5/windows/mlx5_flow_os.c @@ -219,9 +219,9 @@ mlx5_flow_os_create_flow(void *matcher, void *match_value, default: break; } - MLX5_SET(devx_fs_rule_add_in, in, match_criteria_enable, - MLX5_MATCH_OUTER_HEADERS); } + MLX5_SET(devx_fs_rule_add_in, in, match_criteria_enable, + mlx5_matcher->attr.match_criteria_enable); *flow = mlx5_glue->devx_fs_rule_add(mlx5_matcher->ctx, in, sizeof(in)); return (*flow) ? 0 : -1; } diff --git a/drivers/net/mlx5/windows/mlx5_flow_os.h b/drivers/net/mlx5/windows/mlx5_flow_os.h index 2cd4e953256..c087bca4bef 100644 --- a/drivers/net/mlx5/windows/mlx5_flow_os.h +++ b/drivers/net/mlx5/windows/mlx5_flow_os.h @@ -47,6 +47,7 @@ mlx5_flow_os_item_supported(int item) case RTE_FLOW_ITEM_TYPE_IPV6: case RTE_FLOW_ITEM_TYPE_VLAN: case RTE_FLOW_ITEM_TYPE_ESP: + case RTE_FLOW_ITEM_TYPE_NVGRE: return true; default: return false; From 9727c1bae406fc080fdfcf3dae5c7a699e896b53 Mon Sep 17 00:00:00 2001 From: Christophe Fontaine Date: Tue, 18 Nov 2025 18:45:26 +0100 Subject: [PATCH 79/99] net: fix IPv6 link local compliance with RFC 4291 As specified in RFC 4291 section 2.5.1, link local addresses must be generated based on a modified EUI-64 interface identifier: > Modified EUI-64 format interface identifiers are formed by inverting > the "u" bit (universal/local bit in IEEE EUI-64 terminology) when > forming the interface identifier from IEEE EUI-64 identifiers. This translates to 'mac->addr_bytes[0] ^= 0x02'. Fixes: 3d6d85f58c1c ("net: add utilities for well known IPv6 address types") Cc: stable@dpdk.org Signed-off-by: Christophe Fontaine Signed-off-by: Robin Jarry --- app/test/test_net_ip6.c | 2 +- lib/net/rte_ip6.h | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/app/test/test_net_ip6.c b/app/test/test_net_ip6.c index cfc55094030..e4642c9a39d 100644 --- a/app/test/test_net_ip6.c +++ b/app/test/test_net_ip6.c @@ -160,7 +160,7 @@ test_ipv6_llocal_from_ethernet(void) { const struct rte_ether_addr local_mac = {{0x04, 0x7b, 0xcb, 0x5c, 0x08, 0x44}}; const struct rte_ipv6_addr local_ip = - RTE_IPV6(0xfe80, 0, 0, 0, 0x047b, 0xcbff, 0xfe5c, 0x0844); + RTE_IPV6(0xfe80, 0, 0, 0, 0x067b, 0xcbff, 0xfe5c, 0x0844); struct rte_ipv6_addr ip; rte_ipv6_llocal_from_ethernet(&ip, &local_mac); diff --git a/lib/net/rte_ip6.h b/lib/net/rte_ip6.h index 98bcac3f4df..d1abf1f5d52 100644 --- a/lib/net/rte_ip6.h +++ b/lib/net/rte_ip6.h @@ -393,7 +393,7 @@ rte_ipv6_mc_scope(const struct rte_ipv6_addr *ip) /* * Generate a link-local IPv6 address from an Ethernet address as specified in - * RFC 2464, section 5. + * RFC 4291, section 2.5.1. * * @param[out] ip * The link-local IPv6 address to generate. @@ -406,7 +406,12 @@ rte_ipv6_llocal_from_ethernet(struct rte_ipv6_addr *ip, const struct rte_ether_a ip->a[0] = 0xfe; ip->a[1] = 0x80; memset(&ip->a[2], 0, 6); - ip->a[8] = mac->addr_bytes[0]; + /* + * The "u" bit (universal/local bit in IEEE EUI-64 terminology) + * must be inverted for IPv6 link local address. + * 0 means local scope, 1 means universal scope. + */ + ip->a[8] = mac->addr_bytes[0] ^ RTE_ETHER_LOCAL_ADMIN_ADDR; ip->a[9] = mac->addr_bytes[1]; ip->a[10] = mac->addr_bytes[2]; ip->a[11] = 0xff; From 554192dbe9e975402b56a8c9f5f7cee3ae15ad3c Mon Sep 17 00:00:00 2001 From: Dean Marx Date: Fri, 7 Nov 2025 16:50:15 -0500 Subject: [PATCH 80/99] dts: add start Tx first method to testpmd shell Add start tx_first method to testpmd shell, which sends a specified number of burst packets prior to starting packet forwarding. Signed-off-by: Dean Marx Reviewed-by: Luca Vizzarro Reviewed-by: Patrick Robb --- dts/api/testpmd/__init__.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/dts/api/testpmd/__init__.py b/dts/api/testpmd/__init__.py index b6f5a8f0daf..2a3ec49a765 100644 --- a/dts/api/testpmd/__init__.py +++ b/dts/api/testpmd/__init__.py @@ -214,6 +214,27 @@ def start(self, verify: bool = True) -> None: self._logger.debug(f"Failed to start packet forwarding: \n{start_cmd_output}") raise InteractiveCommandExecutionError("Testpmd failed to start packet forwarding.") + @_requires_started_ports + def start_tx_first(self, burst_num: int, verify: bool = True) -> None: + """Start packet forwarding after sending specified number of bursts of packets. + + Args: + burst_num: Number of packets to send before stopping transmission. + verify: If :data:`True` , a second start command will be sent in an attempt to verify + packet forwarding started as expected. + + Raises: + InteractiveCommandExecutionError: If `verify` is :data:`True` and forwarding fails to + start or ports fail to come up. + """ + self.send_command(f"start tx_first {burst_num if burst_num is not None else ""}") + if verify: + # If forwarding was already started, sending "start" again should tell us + start_cmd_output = self.send_command("start") + if "Packet forwarding already started" not in start_cmd_output: + self._logger.debug(f"Failed to start packet forwarding: \n{start_cmd_output}") + raise InteractiveCommandExecutionError("Testpmd failed to start packet forwarding.") + def stop(self, verify: bool = True) -> str: """Stop packet forwarding. From 9a1073c2f757020da969a20f841fec81a98f9d9f Mon Sep 17 00:00:00 2001 From: Dean Marx Date: Fri, 7 Nov 2025 16:50:16 -0500 Subject: [PATCH 81/99] dts: add kernel interface runtime config Add methods to Linux Session class for setting the link of an interface up and deleting an interface. Signed-off-by: Dean Marx Reviewed-by: Patrick Robb --- dts/framework/testbed_model/linux_session.py | 8 ++++++++ dts/framework/testbed_model/os_session.py | 16 ++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/dts/framework/testbed_model/linux_session.py b/dts/framework/testbed_model/linux_session.py index 1f11c3e7403..711d4d97c33 100644 --- a/dts/framework/testbed_model/linux_session.py +++ b/dts/framework/testbed_model/linux_session.py @@ -204,6 +204,14 @@ def bring_up_link(self, ports: Iterable[Port]) -> None: del self._lshw_net_info + def set_interface_link_up(self, name: str) -> None: + """Overrides :meth:`~.os_session.OSSession.set_interface_link_up`.""" + self.send_command(f"ip link set dev {name} up", privileged=True, verify=True) + + def delete_interface(self, name: str) -> None: + """Overrides :meth:`~.os_session.OSSession.delete_interface`.""" + self.send_command(f"ip link delete {name}", privileged=True) + @cached_property def devbind_script_path(self) -> PurePath: """The path to the dpdk-devbind.py script on the node. diff --git a/dts/framework/testbed_model/os_session.py b/dts/framework/testbed_model/os_session.py index b41bb422051..b94c3e527bd 100644 --- a/dts/framework/testbed_model/os_session.py +++ b/dts/framework/testbed_model/os_session.py @@ -590,6 +590,22 @@ def bring_up_link(self, ports: Iterable[Port]) -> None: ports: The ports to apply the link up command to. """ + @abstractmethod + def set_interface_link_up(self, name: str) -> None: + """Send operating system specific command for bringing up link on specified interface. + + Args: + name: String representing logical name of port to apply the link up command to. + """ + + @abstractmethod + def delete_interface(self, name: str) -> None: + """Send operating system specific command for deleting specified interface. + + Args: + name: String representing logical name of interface to delete. + """ + @abstractmethod def configure_port_mtu(self, mtu: int, port: Port) -> None: """Configure `mtu` on `port`. From 66fe1099dea1153c1e1b65f78acd722fb6916950 Mon Sep 17 00:00:00 2001 From: Dean Marx Date: Fri, 7 Nov 2025 16:50:17 -0500 Subject: [PATCH 82/99] dts: add port list command to testpmd shell Add a command to the testpmd shell for setting the portlist (list of forwarding ports) within a testpmd session. This allows for changing the forwarding order between ports. Signed-off-by: Dean Marx Reviewed-by: Patrick Robb --- dts/api/testpmd/__init__.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/dts/api/testpmd/__init__.py b/dts/api/testpmd/__init__.py index 2a3ec49a765..0891b9edbc2 100644 --- a/dts/api/testpmd/__init__.py +++ b/dts/api/testpmd/__init__.py @@ -1523,6 +1523,23 @@ def set_queue_mbuf_fast_free( f"Failed to get offload config on port {port_id}, queue {queue_id}:\n{output}" ) + def set_portlist(self, order: list[int], verify: bool = True) -> None: + """Sets the order of forwarding ports. + + Args: + order: List of integers representing the desired port ordering. + verify: If :data:`True` the output of the command will be scanned in an attempt to + verify that the portlist was successfully set. + + Raises: + InteractiveCommandExecutionError: If the portlist could not be set. + """ + order_list = ",".join(map(str, order)) + portlist_output = self.send_command(f"set portlist {order_list}") + if verify: + if "Invalid port" in portlist_output: + raise InteractiveCommandExecutionError(f"Invalid port in order {order_list}") + @_requires_started_ports def get_offload_config( self, From 4623d9ffae1c0b59e8745cbfdfcf6610b98ac37b Mon Sep 17 00:00:00 2001 From: Dean Marx Date: Fri, 7 Nov 2025 16:50:18 -0500 Subject: [PATCH 83/99] dts: add virtio forwarding test suite Add test suite covering virtio-user/vhost-user server/client forwarding scenarios with testpmd packet validation. Signed-off-by: Dean Marx Reviewed-by: Patrick Robb --- doc/api/dts/tests.TestSuite_virtio_fwd.rst | 8 + dts/tests/TestSuite_virtio_fwd.py | 197 +++++++++++++++++++++ 2 files changed, 205 insertions(+) create mode 100644 doc/api/dts/tests.TestSuite_virtio_fwd.rst create mode 100644 dts/tests/TestSuite_virtio_fwd.py diff --git a/doc/api/dts/tests.TestSuite_virtio_fwd.rst b/doc/api/dts/tests.TestSuite_virtio_fwd.rst new file mode 100644 index 00000000000..e40cc4ca2bd --- /dev/null +++ b/doc/api/dts/tests.TestSuite_virtio_fwd.rst @@ -0,0 +1,8 @@ +.. SPDX-License-Identifier: BSD-3-Clause + +virtio_fwd Test Suite +===================== + +.. automodule:: tests.TestSuite_virtio_fwd + :members: + :show-inheritance: diff --git a/dts/tests/TestSuite_virtio_fwd.py b/dts/tests/TestSuite_virtio_fwd.py new file mode 100644 index 00000000000..bdecdb76fd1 --- /dev/null +++ b/dts/tests/TestSuite_virtio_fwd.py @@ -0,0 +1,197 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2025 University of New Hampshire + +"""Virtio forwarding test suite. + +Verify vhost/virtio pvp and fully virtual functionalities. +""" + +from scapy.layers.inet import IP +from scapy.layers.l2 import Ether + +from api.capabilities import LinkTopology +from api.packet import send_packets_and_capture +from api.test import log, verify +from api.testpmd import TestPmd +from api.testpmd.config import PortTopology, SimpleForwardingModes +from framework.test_suite import TestSuite, func_test +from framework.testbed_model.capability import requires +from framework.testbed_model.linux_session import LinuxSession +from framework.testbed_model.virtual_device import VirtualDevice + + +class TestVirtioFwd(TestSuite): + """Virtio forwarding test suite.""" + + virtio_user_vdev = VirtualDevice( + "net_virtio_user0,mac=00:01:02:03:04:05,path=/tmp/vhost-net,server=1" + ) + vhost_user_vdev = VirtualDevice("eth_vhost0,iface=/tmp/vhost-net,client=1") + + @requires(topology_type=LinkTopology.NO_LINK) + @func_test + def virtio_server(self) -> None: + """Test virtio server packet transmission. + + Steps: + * Launch a testpmd session with a vhost-user virtual device (client side). + * Launch a testpmd session with a virtio-user virtual device (server side). + * Set the forwarding mode to mac in both sessions. + * Start packet forwarding on vhost session. + * Send a burst of packets from the virtio session. + * Stop packet forwarding on vhost session and collect packet stats. + + Verify: + * Vhost session receives packets from virtio session. + """ + with ( + TestPmd( + prefix="vhost", + no_pci=True, + memory_channels=4, + vdevs=[self.vhost_user_vdev], + ) as vhost, + TestPmd( + prefix="virtio", + no_pci=True, + memory_channels=4, + vdevs=[self.virtio_user_vdev], + ) as virtio, + ): + vhost.set_forward_mode(SimpleForwardingModes.mac) + virtio.set_forward_mode(SimpleForwardingModes.mac) + + vhost.start() + virtio.start_tx_first(burst_num=32) + vhost.stop() + + vhost_forwarding_stats, vhost_raw_output = vhost.show_port_stats_all() + + rx_packets = vhost_forwarding_stats[0].rx_packets + tx_packets = vhost_forwarding_stats[0].tx_packets + + log(f"Vhost forwarding statistics:\n{vhost_raw_output}") + + verify( + rx_packets != 0 and tx_packets != 0, + "Vhost session failed to receive packets from virtio session.", + ) + + @requires(topology_type=LinkTopology.NO_LINK) + @func_test + def virtio_server_reconnect(self) -> None: + """Test virtio server reconnection. + + Steps: + * Launch a testpmd session with a vhost-user virtual device (client side). + * Launch a testpmd session with a virtio-user virtual device (server side). + * Close the virtio session and relaunch it. + * Start packet forwarding on vhost session. + * Send a burst of packets from the virtio session. + * Stop packet forwarding on vhost session and collect packet stats. + + Verify: + * Vhost session receives packets from relaunched virtio session. + """ + with TestPmd( + prefix="vhost", + no_pci=True, + memory_channels=4, + vdevs=[self.vhost_user_vdev], + ) as vhost: + with TestPmd( + prefix="virtio", + no_pci=True, + memory_channels=4, + vdevs=[self.virtio_user_vdev], + ) as virtio: + pass + # end session and reconnect + with TestPmd( + prefix="virtio", + no_pci=True, + memory_channels=4, + vdevs=[self.virtio_user_vdev], + ) as virtio: + virtio.set_forward_mode(SimpleForwardingModes.mac) + vhost.set_forward_mode(SimpleForwardingModes.mac) + + vhost.start() + virtio.start_tx_first(burst_num=32) + vhost.stop() + + vhost_forwarding_stats, vhost_raw_output = vhost.show_port_stats_all() + + rx_packets = vhost_forwarding_stats[0].rx_packets + tx_packets = vhost_forwarding_stats[0].tx_packets + + log(f"Vhost forwarding statistics:\n{vhost_raw_output}") + + verify( + rx_packets != 0 and tx_packets != 0, + "Vhost session failed to receive packets from virtio session.", + ) + + @requires(topology_type=LinkTopology.ONE_LINK) + @func_test + def pvp_loop(self) -> None: + """Test vhost/virtio physical-virtual-physical topology. + + Steps: + * Launch testpmd session with a physical NIC and virtio-user vdev + connected to a vhost-net socket. + * Configure the tap interface that is created with IP address and + set link state to UP. + * Launch second testpmd session with af_packet vdev connected to + the tap interface. + * Start packet forwarding on both testpmd sessions. + * Send 100 packets to the physical interface from external tester. + + Verify: + * Vhost session receives/forwards 100+ packets. + """ + self.sut_node = self._ctx.sut_node + if not isinstance(self.sut_node.main_session, LinuxSession): + verify(False, "Must be running on a Linux environment.") + # delete tap0 interface if it exists + self.sut_node.main_session.delete_interface(name="tap0") + with TestPmd( + prefix="virtio", + vdevs=[VirtualDevice("virtio_user0,path=/dev/vhost-net,queues=1,queue_size=1024")], + port_topology=PortTopology.chained, + ) as virtio: + self.sut_node.main_session.set_interface_link_up(name="tap0") + with TestPmd( + prefix="vhost", + no_pci=True, + vdevs=[VirtualDevice("net_af_packet0,iface=tap0")], + port_topology=PortTopology.loop, + ) as vhost: + virtio.set_forward_mode(SimpleForwardingModes.mac) + vhost.set_forward_mode(SimpleForwardingModes.mac) + + portlist_order = [0, 2, 1] if len(virtio.ports) == 3 else [0, 1] + virtio.set_portlist(order=portlist_order) + + vhost.start() + virtio.start() + + packet = Ether() / IP() + packets = [packet] * 100 + send_packets_and_capture(packets) + + vhost.stop() + virtio.stop() + + vhost_forwarding_stats, vhost_raw_output = vhost.show_port_stats_all() + + rx_packets = vhost_forwarding_stats[0].rx_packets + tx_packets = vhost_forwarding_stats[0].tx_packets + + log(f"Vhost forwarding statistics:\n{vhost_raw_output}") + + verify( + rx_packets >= 100 and tx_packets >= 100, + f"PVP loop forwarding verification failed: vhost interface RX={rx_packets}," + f" TX={tx_packets} (expected ≥100 each).", + ) From 91a226f80c8bc2253bd01dacfc3890a48cb0b898 Mon Sep 17 00:00:00 2001 From: Dean Marx Date: Thu, 28 Aug 2025 13:47:45 -0400 Subject: [PATCH 84/99] doc: add test case docstring example to DTS guide Add a section to the dts rst under How to Write a Test Suite which provides an example for how to write a test case docstring, including a steps and verify section. Signed-off-by: Dean Marx Reviewed-by: Luca Vizzarro --- doc/guides/tools/dts.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/doc/guides/tools/dts.rst b/doc/guides/tools/dts.rst index 25c08c6a00d..fd43c4c6187 100644 --- a/doc/guides/tools/dts.rst +++ b/doc/guides/tools/dts.rst @@ -427,6 +427,21 @@ and performance test cases should use ``@perf_test``. A test suite may include any number of functional and/or performance test cases. Each suite should focus on testing a single feature (one feature = one test suite). +Test case docstrings must include a Steps and Verify section. +For example:: + + @func_test + def test_basic_link(self): + """Tests basic link status. + + Steps: + * Launch testpmd. + * Check port info. + + Verify: + * Port info shows link status is up. + """ + Setup and Teardown Hooks ~~~~~~~~~~~~~~~~~~~~~~~~ From 0c271de9cf6fcdebc9ee063e65e6d153e130d191 Mon Sep 17 00:00:00 2001 From: Nicholas Pratte Date: Wed, 12 Nov 2025 20:27:30 -0500 Subject: [PATCH 85/99] dts: rework traffic generator inheritance structure Rework TG class hierarchy to include performance traffic generators. As such, methods specific to capturing traffic have been moved to the CapturingTrafficGenerator subclass. Bugzilla ID: 1697 Signed-off-by: Nicholas Pratte Signed-off-by: Patrick Robb Reviewed-by: Dean Marx Reviewed-by: Andrew Bailey --- .../capturing_traffic_generator.py | 34 +++++++++++ .../performance_traffic_generator.py | 59 +++++++++++++++++++ .../traffic_generator/traffic_generator.py | 38 ------------ 3 files changed, 93 insertions(+), 38 deletions(-) create mode 100644 dts/framework/testbed_model/traffic_generator/performance_traffic_generator.py diff --git a/dts/framework/testbed_model/traffic_generator/capturing_traffic_generator.py b/dts/framework/testbed_model/traffic_generator/capturing_traffic_generator.py index 6f7ae022ddd..7655751d7eb 100644 --- a/dts/framework/testbed_model/traffic_generator/capturing_traffic_generator.py +++ b/dts/framework/testbed_model/traffic_generator/capturing_traffic_generator.py @@ -65,6 +65,40 @@ def is_capturing(self) -> bool: """This traffic generator can capture traffic.""" return True + def send_packet(self, packet: Packet, port: Port) -> None: + """Send `packet` and block until it is fully sent. + + Send `packet` on `port`, then wait until `packet` is fully sent. + + Args: + packet: The packet to send. + port: The egress port on the TG node. + """ + self.send_packets([packet], port) + + def send_packets(self, packets: list[Packet], port: Port) -> None: + """Send `packets` and block until they are fully sent. + + Send `packets` on `port`, then wait until `packets` are fully sent. + + Args: + packets: The packets to send. + port: The egress port on the TG node. + """ + self._logger.info(f"Sending packet{'s' if len(packets) > 1 else ''}.") + self._logger.debug(get_packet_summaries(packets)) + self._send_packets(packets, port) + + @abstractmethod + def _send_packets(self, packets: list[Packet], port: Port) -> None: + """The implementation of :method:`send_packets`. + + The subclasses must implement this method which sends `packets` on `port`. + The method should block until all `packets` are fully sent. + + What fully sent means is defined by the traffic generator. + """ + def send_packets_and_capture( self, packets: list[Packet], diff --git a/dts/framework/testbed_model/traffic_generator/performance_traffic_generator.py b/dts/framework/testbed_model/traffic_generator/performance_traffic_generator.py new file mode 100644 index 00000000000..5be846361c3 --- /dev/null +++ b/dts/framework/testbed_model/traffic_generator/performance_traffic_generator.py @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2025 University of New Hampshire + +"""Traffic generators for performance tests which can generate a high number of packets.""" + +from abc import abstractmethod +from dataclasses import dataclass + +from scapy.packet import Packet + +from .traffic_generator import TrafficGenerator + + +@dataclass(slots=True) +class PerformanceTrafficStats: + """Data structure to store performance statistics for a given test run. + + Attributes: + tx_pps: Recorded tx packets per second. + tx_bps: Recorded tx bytes per second. + rx_pps: Recorded rx packets per second. + rx_bps: Recorded rx bytes per second. + frame_size: The total length of the frame. + """ + + tx_pps: float + tx_bps: float + rx_pps: float + rx_bps: float + + frame_size: int | None = None + + +class PerformanceTrafficGenerator(TrafficGenerator): + """An abstract base class for all performance-oriented traffic generators. + + Provides an intermediary interface for performance-based traffic generator. + """ + + @abstractmethod + def calculate_traffic_and_stats( + self, + packet: Packet, + duration: float, + send_mpps: int | None = None, + ) -> PerformanceTrafficStats: + """Send packet traffic and acquire associated statistics. + + If `send_mpps` is provided, attempt to transmit traffic at the `send_mpps` rate. + Otherwise, attempt to transmit at line rate. + + Args: + packet: The packet to send. + duration: Performance test duration (in seconds). + send_mpps: The millions packets per second send rate. + + Returns: + Performance statistics of the generated test. + """ diff --git a/dts/framework/testbed_model/traffic_generator/traffic_generator.py b/dts/framework/testbed_model/traffic_generator/traffic_generator.py index cac119c183c..e5f246df7a7 100644 --- a/dts/framework/testbed_model/traffic_generator/traffic_generator.py +++ b/dts/framework/testbed_model/traffic_generator/traffic_generator.py @@ -11,14 +11,10 @@ from abc import ABC, abstractmethod from typing import Any -from scapy.packet import Packet - from framework.config.test_run import TrafficGeneratorConfig from framework.logger import DTSLogger, get_dts_logger from framework.testbed_model.node import Node -from framework.testbed_model.port import Port from framework.testbed_model.topology import Topology -from framework.utils import get_packet_summaries class TrafficGenerator(ABC): @@ -57,40 +53,6 @@ def teardown(self) -> None: """Teardown the traffic generator.""" self.close() - def send_packet(self, packet: Packet, port: Port) -> None: - """Send `packet` and block until it is fully sent. - - Send `packet` on `port`, then wait until `packet` is fully sent. - - Args: - packet: The packet to send. - port: The egress port on the TG node. - """ - self.send_packets([packet], port) - - def send_packets(self, packets: list[Packet], port: Port) -> None: - """Send `packets` and block until they are fully sent. - - Send `packets` on `port`, then wait until `packets` are fully sent. - - Args: - packets: The packets to send. - port: The egress port on the TG node. - """ - self._logger.info(f"Sending packet{'s' if len(packets) > 1 else ''}.") - self._logger.debug(get_packet_summaries(packets)) - self._send_packets(packets, port) - - @abstractmethod - def _send_packets(self, packets: list[Packet], port: Port) -> None: - """The implementation of :method:`send_packets`. - - The subclasses must implement this method which sends `packets` on `port`. - The method should block until all `packets` are fully sent. - - What fully sent means is defined by the traffic generator. - """ - @property def is_capturing(self) -> bool: """This traffic generator can't capture traffic.""" From 581f250f908441156fc135f8cf4bffb865192e19 Mon Sep 17 00:00:00 2001 From: Nicholas Pratte Date: Wed, 12 Nov 2025 20:27:31 -0500 Subject: [PATCH 86/99] dts: add TRex traffic generator Implement the TRex traffic generator for use in the DTS framework. The provided implementation leverages TRex's stateless API automation library, via use of a Python shell. The DTS context has been modified to include a performance traffic generator in addition to a functional traffic generator. In addition, the DTS testrun state machine has been modified such that traffic generators are brought up and down as needed, and so that only one traffic generator application is running on the TG system at a time. During the testcase setup stage, the testcase type (perf or func) will be checked and the correct traffic generator brought up. For instance, if a functional TG is running from a previous test and we start a performance test, then the functional TG is stopped and the performance TG started. This is an attempt to strike a balance between the concept of having the scapy asyncsniffer always on to save on execution time, with the competing need to bring up performance traffic generators as needed. There is also an added boolean toggle for adding new shells to the current shell pool or omitting them from the shell pool in order to facilitate this new TG initialization approach. Bugzilla ID: 1697 Signed-off-by: Nicholas Pratte Signed-off-by: Patrick Robb Reviewed-by: Dean Marx Reviewed-by: Andrew Bailey --- doc/guides/tools/dts.rst | 64 ++++- dts/api/packet.py | 6 +- dts/{ => configurations}/nodes.example.yaml | 0 .../test_run.example.yaml | 6 +- .../tests_config.example.yaml | 0 dts/framework/config/test_run.py | 22 +- dts/framework/context.py | 5 +- dts/framework/remote_session/blocking_app.py | 12 +- .../remote_session/interactive_shell.py | 8 +- dts/framework/settings.py | 12 +- dts/framework/test_run.py | 52 +++- .../traffic_generator/__init__.py | 13 +- .../testbed_model/traffic_generator/scapy.py | 14 +- .../traffic_generator/traffic_generator.py | 22 ++ .../testbed_model/traffic_generator/trex.py | 259 ++++++++++++++++++ 15 files changed, 449 insertions(+), 46 deletions(-) rename dts/{ => configurations}/nodes.example.yaml (100%) rename dts/{ => configurations}/test_run.example.yaml (88%) rename dts/{ => configurations}/tests_config.example.yaml (100%) create mode 100644 dts/framework/testbed_model/traffic_generator/trex.py diff --git a/doc/guides/tools/dts.rst b/doc/guides/tools/dts.rst index fd43c4c6187..46c849a7aa5 100644 --- a/doc/guides/tools/dts.rst +++ b/doc/guides/tools/dts.rst @@ -209,7 +209,9 @@ These need to be set up on a Traffic Generator Node: #. **Traffic generator dependencies** The traffic generator running on the traffic generator node must be installed beforehand. - For Scapy traffic generator, only a few Python libraries need to be installed: + + For Scapy traffic generator (functional tests), + only a few Python libraries need to be installed: .. code-block:: console @@ -217,6 +219,40 @@ These need to be set up on a Traffic Generator Node: sudo pip install --upgrade pip sudo pip install scapy==2.5.0 + For TRex traffic generator (performance tests), + TRex must be downloaded and a TRex config produced for each TG NIC. + For example: + + .. code-block:: console + + wget https://trex-tgn.cisco.com/trex/release/v3.03.tar.gz + tar -xf v3.03.tar.gz + cd v3.03 + sudo ./dpdk_setup_ports.py -i + + Within the ``dpdk_setup_ports.py`` utility, follow these instructions: + + - Select MAC based config. + - Select interfaces 0 and 1 on your TG NIC. + - Do not change assumed dest to DUT MAC (just leave the default loopback). + - Print preview of the config. + - Check for device address correctness. + - Check for socket and CPU correctness (CPU/socket NUMA node should match NIC NUMA node). + - Write the file to a path on your system. + + Then, presuming you are using the ``test_run.example.yaml`` + as a template for your `test_run` config: + + - Uncomment the `performance_traffic_generator` section, + making DTS use a performance TG. + - Update the `remote_path` and config fields + to the remote path of your TRex directory + and the path to your new TRex config file. + - Update the `perf` field to enable performance testing. + + After these steps, you should be ready to run performance tests with TRex. + + #. **Hardware dependencies** The traffic generators, like DPDK, need a proper driver and firmware. @@ -249,9 +285,9 @@ DTS configuration is split into nodes and a test run, and must respect the model definitions as documented in the DTS API docs under the ``config`` page. The root of the configuration is represented by the ``Configuration`` model. -By default, DTS will try to use the ``dts/test_run.example.yaml`` +By default, DTS will try to use the ``dts/configurations/test_run.example.yaml`` :ref:`config file `, -and ``dts/nodes.example.yaml`` +and ``dts/configurations/nodes.example.yaml`` :ref:`config file ` which are templates that illustrate what can be configured in DTS. @@ -278,9 +314,9 @@ DTS is run with ``main.py`` located in the ``dts`` directory using the ``poetry options: -h, --help show this help message and exit --test-run-config-file FILE_PATH - [DTS_TEST_RUN_CFG_FILE] The configuration file that describes the test cases and DPDK build options. (default: test-run.conf.yaml) + [DTS_TEST_RUN_CFG_FILE] The configuration file that describes the test cases and DPDK build options. (default: configurations/test_run.yaml) --nodes-config-file FILE_PATH - [DTS_NODES_CFG_FILE] The configuration file that describes the SUT and TG nodes. (default: nodes.conf.yaml) + [DTS_NODES_CFG_FILE] The configuration file that describes the SUT and TG nodes. (default: configurations/nodes.yaml) --tests-config-file FILE_PATH [DTS_TESTS_CFG_FILE] Configuration file used to override variable values inside specific test suites. (default: None) --output-dir DIR_PATH, --output DIR_PATH @@ -564,20 +600,20 @@ And they both have two network ports which are physically connected to each othe .. _test_run_configuration_example: -``dts/test_run.example.yaml`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``dts/configurations/test_run.example.yaml`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. literalinclude:: ../../../dts/test_run.example.yaml +.. literalinclude:: ../../../dts/configurations/test_run.example.yaml :language: yaml :start-at: # Define .. _nodes_configuration_example: -``dts/nodes.example.yaml`` -~~~~~~~~~~~~~~~~~~~~~~~~~~ +``dts/configurations/nodes.example.yaml`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. literalinclude:: ../../../dts/nodes.example.yaml +.. literalinclude:: ../../../dts/configurations/nodes.example.yaml :language: yaml :start-at: # Define @@ -590,9 +626,9 @@ to demonstrate custom test suite configuration: .. _tests_config_example: -``dts/tests_config.example.yaml`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``dts/configurations/tests_config.example.yaml`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. literalinclude:: ../../../dts/tests_config.example.yaml +.. literalinclude:: ../../../dts/configurations/tests_config.example.yaml :language: yaml :start-at: # Define diff --git a/dts/api/packet.py b/dts/api/packet.py index b6759d4ce01..ac7f64dd179 100644 --- a/dts/api/packet.py +++ b/dts/api/packet.py @@ -85,9 +85,9 @@ def send_packets_and_capture( ) assert isinstance( - get_ctx().tg, CapturingTrafficGenerator + get_ctx().func_tg, CapturingTrafficGenerator ), "Cannot capture with a non-capturing traffic generator" - tg: CapturingTrafficGenerator = cast(CapturingTrafficGenerator, get_ctx().tg) + tg: CapturingTrafficGenerator = cast(CapturingTrafficGenerator, get_ctx().func_tg) # TODO: implement @requires for types of traffic generator packets = adjust_addresses(packets) return tg.send_packets_and_capture( @@ -108,7 +108,7 @@ def send_packets( packets: Packets to send. """ packets = adjust_addresses(packets) - get_ctx().tg.send_packets(packets, get_ctx().topology.tg_port_egress) + get_ctx().func_tg.send_packets(packets, get_ctx().topology.tg_port_egress) def get_expected_packets( diff --git a/dts/nodes.example.yaml b/dts/configurations/nodes.example.yaml similarity index 100% rename from dts/nodes.example.yaml rename to dts/configurations/nodes.example.yaml diff --git a/dts/test_run.example.yaml b/dts/configurations/test_run.example.yaml similarity index 88% rename from dts/test_run.example.yaml rename to dts/configurations/test_run.example.yaml index c90de9d68d4..c8035fccf05 100644 --- a/dts/test_run.example.yaml +++ b/dts/configurations/test_run.example.yaml @@ -23,8 +23,12 @@ dpdk: # in a subdirectory of DPDK tree root directory. Otherwise, will be using the `build_options` # to build the DPDK from source. Either `precompiled_build_dir` or `build_options` can be # defined, but not both. -traffic_generator: +func_traffic_generator: type: SCAPY +# perf_traffic_generator: +# type: TREX +# remote_path: "/opt/trex/v3.03" # The remote path of the traffic generator application. +# config: "/opt/trex_config/trex_config.yaml" # Additional configuration files. (Leave blank if not required) perf: false # disable performance testing func: true # enable functional testing use_virtual_functions: false # use virtual functions (VFs) instead of physical functions diff --git a/dts/tests_config.example.yaml b/dts/configurations/tests_config.example.yaml similarity index 100% rename from dts/tests_config.example.yaml rename to dts/configurations/tests_config.example.yaml diff --git a/dts/framework/config/test_run.py b/dts/framework/config/test_run.py index 71b3755d6e5..6c292a36755 100644 --- a/dts/framework/config/test_run.py +++ b/dts/framework/config/test_run.py @@ -16,7 +16,7 @@ from enum import Enum, auto, unique from functools import cached_property from pathlib import Path, PurePath -from typing import Annotated, Any, Literal, NamedTuple +from typing import Annotated, Any, Literal, NamedTuple, Optional from pydantic import ( BaseModel, @@ -396,6 +396,8 @@ class TrafficGeneratorType(str, Enum): #: SCAPY = "SCAPY" + #: + TREX = "TREX" class TrafficGeneratorConfig(FrozenModel): @@ -412,8 +414,18 @@ class ScapyTrafficGeneratorConfig(TrafficGeneratorConfig): type: Literal[TrafficGeneratorType.SCAPY] +class TrexTrafficGeneratorConfig(TrafficGeneratorConfig): + """TRex traffic generator specific configuration.""" + + type: Literal[TrafficGeneratorType.TREX] + remote_path: PurePath + config: PurePath + + #: A union type discriminating traffic generators by the `type` field. -TrafficGeneratorConfigTypes = Annotated[ScapyTrafficGeneratorConfig, Field(discriminator="type")] +TrafficGeneratorConfigTypes = Annotated[ + TrexTrafficGeneratorConfig, ScapyTrafficGeneratorConfig, Field(discriminator="type") +] #: Comma-separated list of logical cores to use. An empty string or ```any``` means use all lcores. LogicalCores = Annotated[ @@ -461,8 +473,10 @@ class TestRunConfiguration(FrozenModel): #: The DPDK configuration used to test. dpdk: DPDKConfiguration - #: The traffic generator configuration used to test. - traffic_generator: TrafficGeneratorConfigTypes + #: The traffic generator configuration used for functional tests. + func_traffic_generator: Optional[ScapyTrafficGeneratorConfig] = None + #: The traffic generator configuration used for performance tests. + perf_traffic_generator: Optional[TrexTrafficGeneratorConfig] = None #: Whether to run performance tests. perf: bool #: Whether to run functional tests. diff --git a/dts/framework/context.py b/dts/framework/context.py index ae319d949f6..8f1021dc961 100644 --- a/dts/framework/context.py +++ b/dts/framework/context.py @@ -6,7 +6,7 @@ import functools from collections.abc import Callable from dataclasses import MISSING, dataclass, field, fields -from typing import TYPE_CHECKING, Any, ParamSpec, Union +from typing import TYPE_CHECKING, Any, Optional, ParamSpec, Union from framework.exception import InternalError from framework.remote_session.shell_pool import ShellPool @@ -76,7 +76,8 @@ class Context: topology: Topology dpdk_build: "DPDKBuildEnvironment" dpdk: "DPDKRuntimeEnvironment" - tg: "TrafficGenerator" + func_tg: Optional["TrafficGenerator"] + perf_tg: Optional["TrafficGenerator"] local: LocalContext = field(default_factory=LocalContext) shell_pool: ShellPool = field(default_factory=ShellPool) diff --git a/dts/framework/remote_session/blocking_app.py b/dts/framework/remote_session/blocking_app.py index 8de536c259b..c3b02dcc621 100644 --- a/dts/framework/remote_session/blocking_app.py +++ b/dts/framework/remote_session/blocking_app.py @@ -48,20 +48,23 @@ class BlockingApp(InteractiveShell, Generic[P]): def __init__( self, node: Node, - path: PurePath, + path: str | PurePath, name: str | None = None, privileged: bool = False, app_params: P | str = "", + add_to_shell_pool: bool = True, ) -> None: """Constructor. Args: node: The node to run the app on. - path: Path to the application on the node. + path: Path to the application on the node.s name: Name to identify this application. privileged: Run as privileged user. app_params: The application parameters. Can be of any type inheriting :class:`Params` or a plain string. + add_to_shell_pool: If :data:`True`, the blocking app's shell will be added to the + shell pool. """ if isinstance(app_params, str): params = Params() @@ -69,11 +72,12 @@ def __init__( app_params = cast(P, params) self._path = path + self._add_to_shell_pool = add_to_shell_pool super().__init__(node, name, privileged, app_params) @property - def path(self) -> PurePath: + def path(self) -> str | PurePath: """The path of the DPDK app relative to the DPDK build folder.""" return self._path @@ -86,7 +90,7 @@ def wait_until_ready(self, end_token: str) -> Self: Returns: Itself. """ - self.start_application(end_token) + self.start_application(end_token, self._add_to_shell_pool) return self def close(self) -> None: diff --git a/dts/framework/remote_session/interactive_shell.py b/dts/framework/remote_session/interactive_shell.py index ce932470512..a65cbce2097 100644 --- a/dts/framework/remote_session/interactive_shell.py +++ b/dts/framework/remote_session/interactive_shell.py @@ -140,7 +140,7 @@ def _make_start_command(self) -> str: start_command = self._node.main_session._get_privileged_command(start_command) return start_command - def start_application(self, prompt: str | None = None) -> None: + def start_application(self, prompt: str | None = None, add_to_shell_pool: bool = True) -> None: """Starts a new interactive application based on the path to the app. This method is often overridden by subclasses as their process for starting may look @@ -151,6 +151,7 @@ def start_application(self, prompt: str | None = None) -> None: Args: prompt: When starting up the application, expect this string at the end of stdout when the application is ready. If :data:`None`, the class' default prompt will be used. + add_to_shell_pool: If :data:`True`, the shell will be registered to the shell pool. Raises: InteractiveCommandExecutionError: If the application fails to start within the allotted @@ -174,7 +175,8 @@ def start_application(self, prompt: str | None = None) -> None: self.is_alive = False # update state on failure to start raise InteractiveCommandExecutionError("Failed to start application.") self._ssh_channel.settimeout(self._timeout) - get_ctx().shell_pool.register_shell(self) + if add_to_shell_pool: + get_ctx().shell_pool.register_shell(self) def send_command( self, command: str, prompt: str | None = None, skip_first_line: bool = False @@ -259,7 +261,7 @@ def close(self) -> None: @property @abstractmethod - def path(self) -> PurePath: + def path(self) -> str | PurePath: """Path to the shell executable.""" def _make_real_path(self) -> PurePath: diff --git a/dts/framework/settings.py b/dts/framework/settings.py index 84b627a06a4..b08373b7eab 100644 --- a/dts/framework/settings.py +++ b/dts/framework/settings.py @@ -130,11 +130,17 @@ class Settings: """ #: - test_run_config_path: Path = Path(__file__).parent.parent.joinpath("test_run.yaml") + test_run_config_path: Path = Path(__file__).parent.parent.joinpath( + "configurations/test_run.yaml" + ) #: - nodes_config_path: Path = Path(__file__).parent.parent.joinpath("nodes.yaml") + nodes_config_path: Path = Path(__file__).parent.parent.joinpath("configurations/nodes.yaml") #: - tests_config_path: Path | None = None + tests_config_path: Path | None = ( + Path(__file__).parent.parent.joinpath("configurations/tests_config.yaml") + if os.path.exists("configurations/tests_config.yaml") + else None + ) #: output_dir: str = "output" #: diff --git a/dts/framework/test_run.py b/dts/framework/test_run.py index 9cf04c0b06f..ff0a12c9cee 100644 --- a/dts/framework/test_run.py +++ b/dts/framework/test_run.py @@ -113,7 +113,7 @@ from framework.remote_session.dpdk import DPDKBuildEnvironment, DPDKRuntimeEnvironment from framework.settings import SETTINGS from framework.test_result import Result, ResultNode, TestRunResult -from framework.test_suite import BaseConfig, TestCase, TestSuite +from framework.test_suite import BaseConfig, TestCase, TestCaseType, TestSuite from framework.testbed_model.capability import ( Capability, get_supported_capabilities, @@ -199,10 +199,26 @@ def __init__( dpdk_build_env = DPDKBuildEnvironment(config.dpdk.build, sut_node) dpdk_runtime_env = DPDKRuntimeEnvironment(config.dpdk, sut_node, dpdk_build_env) - traffic_generator = create_traffic_generator(config.traffic_generator, tg_node) + + func_traffic_generator = ( + create_traffic_generator(config.func_traffic_generator, tg_node) + if config.func and config.func_traffic_generator + else None + ) + perf_traffic_generator = ( + create_traffic_generator(config.perf_traffic_generator, tg_node) + if config.perf and config.perf_traffic_generator + else None + ) self.ctx = Context( - sut_node, tg_node, topology, dpdk_build_env, dpdk_runtime_env, traffic_generator + sut_node, + tg_node, + topology, + dpdk_build_env, + dpdk_runtime_env, + func_traffic_generator, + perf_traffic_generator, ) self.result = result self.selected_tests = list(self.config.filter_tests(tests_config)) @@ -335,7 +351,10 @@ def next(self) -> State | None: test_run.ctx.topology.instantiate_vf_ports() test_run.ctx.topology.configure_ports("sut", "dpdk") - test_run.ctx.tg.setup(test_run.ctx.topology) + if test_run.ctx.func_tg: + test_run.ctx.func_tg.setup(test_run.ctx.topology) + if test_run.ctx.perf_tg: + test_run.ctx.perf_tg.setup(test_run.ctx.topology) self.result.ports = [ port.to_dict() @@ -425,7 +444,10 @@ def next(self) -> State | None: self.test_run.ctx.topology.delete_vf_ports() self.test_run.ctx.shell_pool.terminate_current_pool() - self.test_run.ctx.tg.teardown() + if self.test_run.ctx.func_tg and self.test_run.ctx.func_tg.is_setup: + self.test_run.ctx.func_tg.teardown() + if self.test_run.ctx.perf_tg and self.test_run.ctx.perf_tg.is_setup: + self.test_run.ctx.perf_tg.teardown() self.test_run.ctx.topology.teardown() self.test_run.ctx.dpdk.teardown() self.test_run.ctx.tg_node.teardown() @@ -611,6 +633,26 @@ def next(self) -> State | None: ) self.test_run.ctx.topology.configure_ports("sut", sut_ports_drivers) + if ( + self.test_run.ctx.perf_tg + and self.test_run.ctx.perf_tg.is_setup + and self.test_case.test_type is TestCaseType.FUNCTIONAL + ): + self.test_run.ctx.perf_tg.teardown() + self.test_run.ctx.topology.configure_ports("tg", "kernel") + if self.test_run.ctx.func_tg and not self.test_run.ctx.func_tg.is_setup: + self.test_run.ctx.func_tg.setup(self.test_run.ctx.topology) + + if ( + self.test_run.ctx.func_tg + and self.test_run.ctx.func_tg.is_setup + and self.test_case.test_type is TestCaseType.PERFORMANCE + ): + self.test_run.ctx.func_tg.teardown() + self.test_run.ctx.topology.configure_ports("tg", "dpdk") + if self.test_run.ctx.perf_tg and not self.test_run.ctx.perf_tg.is_setup: + self.test_run.ctx.perf_tg.setup(self.test_run.ctx.topology) + self.test_suite.set_up_test_case() self.result.mark_step_as("setup", Result.PASS) return TestCaseExecution( diff --git a/dts/framework/testbed_model/traffic_generator/__init__.py b/dts/framework/testbed_model/traffic_generator/__init__.py index 2a259a6e6c0..fca251f5344 100644 --- a/dts/framework/testbed_model/traffic_generator/__init__.py +++ b/dts/framework/testbed_model/traffic_generator/__init__.py @@ -14,17 +14,22 @@ and a capturing traffic generator is required. """ -from framework.config.test_run import ScapyTrafficGeneratorConfig, TrafficGeneratorConfig +from framework.config.test_run import ( + ScapyTrafficGeneratorConfig, + TrafficGeneratorConfig, + TrexTrafficGeneratorConfig, +) from framework.exception import ConfigurationError from framework.testbed_model.node import Node -from .capturing_traffic_generator import CapturingTrafficGenerator from .scapy import ScapyTrafficGenerator +from .traffic_generator import TrafficGenerator +from .trex import TrexTrafficGenerator def create_traffic_generator( traffic_generator_config: TrafficGeneratorConfig, node: Node -) -> CapturingTrafficGenerator: +) -> TrafficGenerator: """The factory function for creating traffic generator objects from the test run configuration. Args: @@ -40,5 +45,7 @@ def create_traffic_generator( match traffic_generator_config: case ScapyTrafficGeneratorConfig(): return ScapyTrafficGenerator(node, traffic_generator_config, privileged=True) + case TrexTrafficGeneratorConfig(): + return TrexTrafficGenerator(node, traffic_generator_config) case _: raise ConfigurationError(f"Unknown traffic generator: {traffic_generator_config.type}") diff --git a/dts/framework/testbed_model/traffic_generator/scapy.py b/dts/framework/testbed_model/traffic_generator/scapy.py index a31807e8e48..9e15a31c007 100644 --- a/dts/framework/testbed_model/traffic_generator/scapy.py +++ b/dts/framework/testbed_model/traffic_generator/scapy.py @@ -170,12 +170,17 @@ def stop_capturing_and_collect( finally: self.stop_capturing() - def start_application(self, prompt: str | None = None) -> None: + def start_application(self, prompt: str | None = None, add_to_shell_pool: bool = True) -> None: """Overrides :meth:`framework.remote_session.interactive_shell.start_application`. Prepares the Python shell for scapy and starts the sniffing in a new thread. + + Args: + prompt: When starting up the application, expect this string at the end of stdout when + the application is ready. If :data:`None`, the class' default prompt will be used. + add_to_shell_pool: If :data:`True`, the shell will be registered to the shell pool. """ - super().start_application(prompt) + super().start_application(prompt, add_to_shell_pool) self.send_command("from scapy.all import *") self._sniffer.start() self._is_sniffing.wait() @@ -320,15 +325,16 @@ def setup(self, topology: Topology) -> None: Binds the TG node ports to the kernel drivers and starts up the async sniffer. """ + super().setup(topology) topology.configure_ports("tg", "kernel") self._sniffer = ScapyAsyncSniffer( self._tg_node, topology.tg_port_ingress, self._sniffer_name ) - self._sniffer.start_application() + self._sniffer.start_application(add_to_shell_pool=False) self._shell = PythonShell(self._tg_node, "scapy", privileged=True) - self._shell.start_application() + self._shell.start_application(add_to_shell_pool=False) self._shell.send_command("from scapy.all import *") self._shell.send_command("from scapy.contrib.lldp import *") diff --git a/dts/framework/testbed_model/traffic_generator/traffic_generator.py b/dts/framework/testbed_model/traffic_generator/traffic_generator.py index e5f246df7a7..cdda5a7c08e 100644 --- a/dts/framework/testbed_model/traffic_generator/traffic_generator.py +++ b/dts/framework/testbed_model/traffic_generator/traffic_generator.py @@ -11,9 +11,12 @@ from abc import ABC, abstractmethod from typing import Any +from scapy.packet import Packet + from framework.config.test_run import TrafficGeneratorConfig from framework.logger import DTSLogger, get_dts_logger from framework.testbed_model.node import Node +from framework.testbed_model.port import Port from framework.testbed_model.topology import Topology @@ -30,6 +33,7 @@ class TrafficGenerator(ABC): _config: TrafficGeneratorConfig _tg_node: Node _logger: DTSLogger + _is_setup: bool def __init__(self, tg_node: Node, config: TrafficGeneratorConfig, **kwargs: Any) -> None: """Initialize the traffic generator. @@ -45,12 +49,25 @@ def __init__(self, tg_node: Node, config: TrafficGeneratorConfig, **kwargs: Any) self._config = config self._tg_node = tg_node self._logger = get_dts_logger(f"{self._tg_node.name} {self._config.type}") + self._is_setup = False + + def send_packets(self, packets: list[Packet], port: Port) -> None: + """Send `packets` and block until they are fully sent. + + Send `packets` on `port`, then wait until `packets` are fully sent. + + Args: + packets: The packets to send. + port: The egress port on the TG node. + """ def setup(self, topology: Topology) -> None: """Setup the traffic generator.""" + self._is_setup = True def teardown(self) -> None: """Teardown the traffic generator.""" + self._is_setup = False self.close() @property @@ -61,3 +78,8 @@ def is_capturing(self) -> bool: @abstractmethod def close(self) -> None: """Free all resources used by the traffic generator.""" + + @property + def is_setup(self) -> bool: + """Indicates whether the traffic generator application is currently running.""" + return self._is_setup diff --git a/dts/framework/testbed_model/traffic_generator/trex.py b/dts/framework/testbed_model/traffic_generator/trex.py new file mode 100644 index 00000000000..22cd20dea9a --- /dev/null +++ b/dts/framework/testbed_model/traffic_generator/trex.py @@ -0,0 +1,259 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2025 University of New Hampshire + +"""Implementation for TRex performance traffic generator.""" + +import ast +import time +from dataclasses import dataclass, field +from enum import auto +from typing import ClassVar + +from scapy.packet import Packet + +from framework.config.node import OS, NodeConfiguration +from framework.config.test_run import TrexTrafficGeneratorConfig +from framework.parser import TextParser +from framework.remote_session.blocking_app import BlockingApp +from framework.remote_session.python_shell import PythonShell +from framework.testbed_model.node import Node, create_session +from framework.testbed_model.os_session import OSSession +from framework.testbed_model.topology import Topology +from framework.testbed_model.traffic_generator.performance_traffic_generator import ( + PerformanceTrafficGenerator, + PerformanceTrafficStats, +) +from framework.utils import StrEnum + + +@dataclass(slots=True) +class TrexPerformanceTrafficStats(PerformanceTrafficStats, TextParser): + """Data structure to store performance statistics for a given test run. + + This class overrides the initialization of :class:`PerformanceTrafficStats` + in order to set the attribute values using the TRex stats output. + + Attributes: + tx_pps: Recorded tx packets per second. + tx_bps: Recorded tx bytes per second. + rx_pps: Recorded rx packets per second. + rx_bps: Recorded rx bytes per second. + frame_size: The total length of the frame. + """ + + tx_pps: int = field(metadata=TextParser.find_int(r"total.*'tx_pps': (\d+)")) + tx_bps: int = field(metadata=TextParser.find_int(r"total.*'tx_bps': (\d+)")) + rx_pps: int = field(metadata=TextParser.find_int(r"total.*'rx_pps': (\d+)")) + rx_bps: int = field(metadata=TextParser.find_int(r"total.*'rx_bps': (\d+)")) + + +class TrexStatelessTXModes(StrEnum): + """Flags indicating TRex instance's current transmission mode.""" + + #: Transmit continuously + STLTXCont = auto() + #: Transmit in a single burst + STLTXSingleBurst = auto() + #: Transmit in multiple bursts + STLTXMultiBurst = auto() + + +class TrexTrafficGenerator(PerformanceTrafficGenerator): + """TRex traffic generator. + + This implementation leverages the stateless API library provided in the TRex installation. + + Attributes: + stl_client_name: The name of the stateless client used in the stateless API. + packet_stream_name: The name of the stateless packet stream used in the stateless API. + """ + + _os_session: OSSession + + _tg_config: TrexTrafficGeneratorConfig + _node_config: NodeConfiguration + + _shell: PythonShell + _python_indentation: ClassVar[str] = " " * 4 + + stl_client_name: ClassVar[str] = "client" + packet_stream_name: ClassVar[str] = "stream" + + _streaming_mode: TrexStatelessTXModes = TrexStatelessTXModes.STLTXCont + + _tg_cores: int = 10 + + _trex_app: BlockingApp + + def __init__(self, tg_node: Node, config: TrexTrafficGeneratorConfig) -> None: + """Initialize the TRex server. + + Initializes needed OS sessions for the creation of the TRex server process. + + Args: + tg_node: TG node the TRex instance is operating on. + config: Traffic generator config provided for TRex instance. + """ + assert ( + tg_node.config.os == OS.linux + ), "Linux is the only supported OS for trex traffic generation" + + super().__init__(tg_node=tg_node, config=config) + self._tg_node_config = tg_node.config + self._tg_config = config + + self._os_session = create_session(self._tg_node.config, "TRex", self._logger) + + def setup(self, topology: Topology): + """Initialize and start a TRex server process.""" + super().setup(topology) + + self._shell = PythonShell(self._tg_node, "TRex-client", privileged=True) + + # Start TRex server process. + trex_app_path = f"cd {self._tg_config.remote_path} && ./t-rex-64" + self._trex_app = BlockingApp( + node=self._tg_node, + path=trex_app_path, + name="trex-tg", + privileged=True, + app_params=f"--cfg {self._tg_config.config} -c {self._tg_cores} -i", + add_to_shell_pool=False, + ) + self._trex_app.wait_until_ready("-Per port stats table") + + self._shell.start_application() + self._shell.send_command("import os") + self._shell.send_command( + f"os.chdir('{self._tg_config.remote_path}/automation/trex_control_plane/interactive')" + ) + + # Import stateless API components. + imports = [ + "import trex", + "import trex.stl", + "import trex.stl.trex_stl_client", + "import trex.stl.trex_stl_streams", + "import trex.stl.trex_stl_packet_builder_scapy", + "from scapy.layers.l2 import Ether", + "from scapy.layers.inet import IP", + "from scapy.packet import Raw", + ] + self._shell.send_command("\n".join(imports)) + + stateless_client = [ + f"{self.stl_client_name} = trex.stl.trex_stl_client.STLClient(", + f"username='{self._tg_node_config.user}',", + "server='127.0.0.1',", + ")", + ] + + self._shell.send_command(f"\n{self._python_indentation}".join(stateless_client)) + self._shell.send_command(f"{self.stl_client_name}.connect()") + + def calculate_traffic_and_stats( + self, + packet: Packet, + duration: float, + send_mpps: int | None = None, + ) -> PerformanceTrafficStats: + """Send packet traffic and acquire associated statistics. + + Overrides + :meth:`~.traffic_generator.PerformanceTrafficGenerator.calculate_traffic_and_stats`. + """ + trex_stats_output = ast.literal_eval(self._generate_traffic(packet, duration, send_mpps)) + stats = TrexPerformanceTrafficStats.parse(str(trex_stats_output)) + stats.frame_size = len(packet) + return stats + + def _generate_traffic( + self, packet: Packet, duration: float, send_mpps: int | None = None + ) -> str: + """Generate traffic using provided packet. + + Uses the provided packet to generate traffic for the provided duration. + + Args: + packet: The packet being used for the performance test. + duration: The duration of the test being performed. + send_mpps: MPPS send rate. + + Returns: + A string output of statistics provided by the traffic generator. + """ + self._create_packet_stream(packet) + self._setup_trex_client() + + stats = self._send_traffic_and_get_stats(duration, send_mpps) + + return stats + + def _setup_trex_client(self) -> None: + """Create trex client and connect to the server process.""" + # Prepare TRex client for next performance test. + procedure = [ + f"{self.stl_client_name}.connect()", + f"{self.stl_client_name}.reset(ports = [0, 1])", + f"{self.stl_client_name}.clear_stats()", + f"{self.stl_client_name}.add_streams({self.packet_stream_name}, ports=[0, 1])", + ] + + for command in procedure: + self._shell.send_command(command) + + def _create_packet_stream(self, packet: Packet) -> None: + """Create TRex packet stream with the given packet. + + Args: + packet: The packet being used for the performance test. + """ + # Create the tx packet on the TG shell + self._shell.send_command(f"packet={packet.command()}") + + packet_stream = [ + f"{self.packet_stream_name} = trex.stl.trex_stl_streams.STLStream(", + f"name='Test_{len(packet)}_bytes',", + "packet=trex.stl.trex_stl_packet_builder_scapy.STLPktBuilder(pkt=packet),", + f"mode=trex.stl.trex_stl_streams.{self._streaming_mode}(percentage=100),", + ")", + ] + self._shell.send_command("\n".join(packet_stream)) + + def _send_traffic_and_get_stats(self, duration: float, send_mpps: float | None = None) -> str: + """Send traffic and get TG Rx stats. + + Sends traffic from the TRex client's ports for the given duration. + When the traffic sending duration has passed, collect the aggregate + statistics and return TRex's global stats as a string. + + Args: + duration: The traffic generation duration. + send_mpps: The millions of packets per second for TRex to send from each port. + """ + if send_mpps: + self._shell.send_command(f"""{self.stl_client_name}.start(ports=[0, 1], + mult = '{send_mpps}mpps', + duration = {duration})""") + else: + self._shell.send_command(f"""{self.stl_client_name}.start(ports=[0, 1], + mult = '100%', + duration = {duration})""") + + time.sleep(duration) + + stats = self._shell.send_command( + f"{self.stl_client_name}.get_stats(ports=[0, 1])", skip_first_line=True + ) + + self._shell.send_command(f"{self.stl_client_name}.stop(ports=[0, 1])") + + return stats + + def close(self) -> None: + """Overrides :meth:`.traffic_generator.TrafficGenerator.close`. + + Stops the traffic generator and sniffer shells. + """ + self._trex_app.close() + self._shell.close() From d77d7f04f24c24a1199af2a7a5a8585fd8bb5bdb Mon Sep 17 00:00:00 2001 From: Nicholas Pratte Date: Wed, 12 Nov 2025 20:27:32 -0500 Subject: [PATCH 87/99] dts: add single-core performance test suite Provide packet transmission function to support performance tests using a user-supplied performance traffic generator. The single core performance test is included. It allows the user to define a matrix of frame size, descriptor count, and expected mpps, and fails if any combination does not forward a mpps count within 5% of the given baseline. Furthermore ensure that the DPDK build on the SUT is including the correct arguments to allow for the highest possible packet throughput. Bugzilla ID: 1697 Signed-off-by: Nicholas Pratte Signed-off-by: Patrick Robb Reviewed-by: Dean Marx Reviewed-by: Andrew Bailey Tested-by: Dean Marx --- ...sts.TestSuite_single_core_forward_perf.rst | 8 + dts/api/packet.py | 35 +++- dts/api/test.py | 32 ++++ dts/configurations/tests_config.example.yaml | 12 ++ dts/framework/remote_session/dpdk.py | 15 +- .../TestSuite_single_core_forward_perf.py | 149 ++++++++++++++++++ 6 files changed, 249 insertions(+), 2 deletions(-) create mode 100644 doc/api/dts/tests.TestSuite_single_core_forward_perf.rst create mode 100644 dts/tests/TestSuite_single_core_forward_perf.py diff --git a/doc/api/dts/tests.TestSuite_single_core_forward_perf.rst b/doc/api/dts/tests.TestSuite_single_core_forward_perf.rst new file mode 100644 index 00000000000..3651b0b0412 --- /dev/null +++ b/doc/api/dts/tests.TestSuite_single_core_forward_perf.rst @@ -0,0 +1,8 @@ +.. SPDX-License-Identifier: BSD-3-Clause + +single_core_forward_perf Test Suite +=================================== + +.. automodule:: tests.TestSuite_single_core_forward_perf + :members: + :show-inheritance: diff --git a/dts/api/packet.py b/dts/api/packet.py index ac7f64dd179..094a1b7a9db 100644 --- a/dts/api/packet.py +++ b/dts/api/packet.py @@ -33,6 +33,9 @@ from framework.testbed_model.traffic_generator.capturing_traffic_generator import ( PacketFilteringConfig, ) +from framework.testbed_model.traffic_generator.performance_traffic_generator import ( + PerformanceTrafficStats, +) from framework.utils import get_packet_summaries @@ -108,7 +111,9 @@ def send_packets( packets: Packets to send. """ packets = adjust_addresses(packets) - get_ctx().func_tg.send_packets(packets, get_ctx().topology.tg_port_egress) + tg = get_ctx().func_tg + if tg: + tg.send_packets(packets, get_ctx().topology.tg_port_egress) def get_expected_packets( @@ -317,3 +322,31 @@ def _verify_l3_packet(received_packet: IP, expected_packet: IP) -> bool: if received_packet.src != expected_packet.src or received_packet.dst != expected_packet.dst: return False return True + + +def assess_performance_by_packet( + packet: Packet, duration: float, send_mpps: int | None = None +) -> PerformanceTrafficStats: + """Send a given packet for a given duration and assess basic performance statistics. + + Send `packet` and assess NIC performance for a given duration, corresponding to the test + suite's given topology. + + Args: + packet: The packet to send. + duration: Performance test duration (in seconds). + send_mpps: The millions packets per second send rate. + + Returns: + Performance statistics of the generated test. + """ + from framework.testbed_model.traffic_generator.performance_traffic_generator import ( + PerformanceTrafficGenerator, + ) + + assert isinstance( + get_ctx().perf_tg, PerformanceTrafficGenerator + ), "Cannot send performance traffic with non-performance traffic generator" + tg: PerformanceTrafficGenerator = cast(PerformanceTrafficGenerator, get_ctx().perf_tg) + # TODO: implement @requires for types of traffic generator + return tg.calculate_traffic_and_stats(packet, duration, send_mpps) diff --git a/dts/api/test.py b/dts/api/test.py index f58c82715de..e17babe0ca1 100644 --- a/dts/api/test.py +++ b/dts/api/test.py @@ -6,6 +6,10 @@ This module provides utility functions for test cases, including logging, verification. """ +import json +from datetime import datetime + +from api.artifact import Artifact from framework.context import get_ctx from framework.exception import InternalError, SkippedTestException, TestCaseVerifyError from framework.logger import DTSLogger @@ -124,3 +128,31 @@ def get_logger() -> DTSLogger: if current_test_suite is None: raise InternalError("No current test suite") return current_test_suite._logger + + +def write_performance_json( + performance_data: dict, filename: str = "performance_metrics.json" +) -> None: + """Write performance test results to a JSON file in the test suite's output directory. + + This method creates a JSON file containing performance metrics in the test suite's + output directory. The data can be a dictionary of any structure. No specific format + is required. + + Args: + performance_data: Dictionary containing performance metrics and results. + filename: Name of the JSON file to create. + + Raises: + InternalError: If performance data is not provided. + """ + if not performance_data: + raise InternalError("No performance data to write") + + perf_data = {"timestamp": datetime.now().isoformat(), **performance_data} + perf_json_artifact = Artifact("local", filename) + + with perf_json_artifact.open("w") as json_file: + json.dump(perf_data, json_file, indent=2) + + get_logger().info(f"Performance results written to: {perf_json_artifact.local_path}") diff --git a/dts/configurations/tests_config.example.yaml b/dts/configurations/tests_config.example.yaml index c011ac05886..fede1fda808 100644 --- a/dts/configurations/tests_config.example.yaml +++ b/dts/configurations/tests_config.example.yaml @@ -3,3 +3,15 @@ # Define the custom test suite configurations hello_world: msg: A custom hello world to you! +# single_core_forward_perf: +# test_parameters: # Add frame size / descriptor count combinations as needed +# - frame_size: 64 +# num_descriptors: 512 +# expected_mpps: 1.0 # Set millions of packets per second according to your devices expected throughput for this given frame size / descriptor count +# - frame_size: 64 +# num_descriptors: 1024 +# expected_mpps: 1.0 +# - frame_size: 512 +# num_descriptors: 1024 +# expected_mpps: 1.0 +# delta_tolerance: 0.05 \ No newline at end of file diff --git a/dts/framework/remote_session/dpdk.py b/dts/framework/remote_session/dpdk.py index c6e8c1b2ec4..c3575cfcaf0 100644 --- a/dts/framework/remote_session/dpdk.py +++ b/dts/framework/remote_session/dpdk.py @@ -24,6 +24,7 @@ RemoteDPDKTarballLocation, RemoteDPDKTreeLocation, ) +from framework.context import get_ctx from framework.exception import ConfigurationError, RemoteFileNotFoundError from framework.logger import DTSLogger, get_dts_logger from framework.params.eal import EalParams @@ -259,9 +260,21 @@ def _build_dpdk(self) -> None: Uses the already configured DPDK build configuration. Assumes that the `remote_dpdk_tree_path` has already been set on the SUT node. """ + ctx = get_ctx() + # If the SUT is an ice driver device, make sure to build with 16B descriptors. + if ( + ctx.topology.sut_port_ingress + and ctx.topology.sut_port_ingress.config.os_driver == "ice" + ): + meson_args = MesonArgs( + default_library="static", libdir="lib", c_args="-DRTE_NET_INTEL_USE_16BYTE_DESC" + ) + else: + meson_args = MesonArgs(default_library="static", libdir="lib") + self._session.build_dpdk( self._env_vars, - MesonArgs(default_library="static", libdir="lib"), + meson_args, self.remote_dpdk_tree_path, self.remote_dpdk_build_dir, ) diff --git a/dts/tests/TestSuite_single_core_forward_perf.py b/dts/tests/TestSuite_single_core_forward_perf.py new file mode 100644 index 00000000000..8a92ba39b56 --- /dev/null +++ b/dts/tests/TestSuite_single_core_forward_perf.py @@ -0,0 +1,149 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2025 University of New Hampshire + +"""Single core forwarding performance test suite. + +This suite measures the amount of packets which can be forwarded by DPDK using a single core. +The testsuites takes in as parameters a set of parameters, each consisting of a frame size, +Tx/Rx descriptor count, and the expected MPPS to be forwarded by the DPDK application. The +test leverages a performance traffic generator to send traffic at two paired TestPMD interfaces +on the SUT system, which forward to one another and then back to the traffic generator's ports. +The aggregate packets forwarded by the two TestPMD ports are compared against the expected MPPS +baseline which is given in the test config, in order to determine the test result. +""" + +from scapy.layers.inet import IP +from scapy.layers.l2 import Ether +from scapy.packet import Raw + +from api.capabilities import ( + LinkTopology, + requires_link_topology, +) +from api.packet import assess_performance_by_packet +from api.test import verify, write_performance_json +from api.testpmd import TestPmd +from api.testpmd.config import RXRingParams, TXRingParams +from framework.params.types import TestPmdParamsDict +from framework.test_suite import BaseConfig, TestSuite, perf_test + + +class Config(BaseConfig): + """Performance test metrics.""" + + test_parameters: list[dict[str, int | float]] = [ + {"frame_size": 64, "num_descriptors": 1024, "expected_mpps": 1.00}, + {"frame_size": 128, "num_descriptors": 1024, "expected_mpps": 1.00}, + {"frame_size": 256, "num_descriptors": 1024, "expected_mpps": 1.00}, + {"frame_size": 512, "num_descriptors": 1024, "expected_mpps": 1.00}, + {"frame_size": 1024, "num_descriptors": 1024, "expected_mpps": 1.00}, + {"frame_size": 1518, "num_descriptors": 1024, "expected_mpps": 1.00}, + ] + delta_tolerance: float = 0.05 + + +@requires_link_topology(LinkTopology.TWO_LINKS) +class TestSingleCoreForwardPerf(TestSuite): + """Single core forwarding performance test suite.""" + + config: Config + + def set_up_suite(self): + """Set up the test suite.""" + self.test_parameters = self.config.test_parameters + self.delta_tolerance = self.config.delta_tolerance + + def _transmit(self, testpmd: TestPmd, frame_size: int) -> float: + """Create a testpmd session with every rule in the given list, verify jump behavior. + + Args: + testpmd: The testpmd shell to use for forwarding packets. + frame_size: The size of the frame to transmit. + + Returns: + The MPPS (millions of packets per second) forwarded by the SUT. + """ + # Build packet with dummy values, and account for the 14B and 20B Ether and IP headers + packet = ( + Ether(src="52:00:00:00:00:00") + / IP(src="1.2.3.4", dst="192.18.1.0") + / Raw(load="x" * (frame_size - 14 - 20)) + ) + + testpmd.start() + + # Transmit for 30 seconds. + stats = assess_performance_by_packet(packet=packet, duration=30) + + rx_mpps = stats.rx_pps / 1_000_000 + + return rx_mpps + + def _produce_stats_table(self, test_parameters: list[dict[str, int | float]]) -> None: + """Display performance results in table format and write to structured JSON file. + + Args: + test_parameters: The expected and real stats per set of test parameters. + """ + header = f"{'Frame Size':>12} | {'TXD/RXD':>12} | {'Real MPPS':>12} | {'Expected MPPS':>14}" + print("-" * len(header)) + print(header) + print("-" * len(header)) + for params in test_parameters: + print(f"{params['frame_size']:>12} | {params['num_descriptors']:>12} | ", end="") + print(f"{params['measured_mpps']:>12.2f} | {params['expected_mpps']:>14.2f}") + print("-" * len(header)) + + write_performance_json({"results": test_parameters}) + + @perf_test + def single_core_forward_perf(self) -> None: + """Validate expected single core forwarding performance. + + Steps: + * Create a packet according to the frame size specified in the test config. + * Transmit from the traffic generator's ports 0 and 1 at above the expect. + * Forward on TestPMD's interfaces 0 and 1 with 1 core. + + Verify: + * The resulting MPPS forwarded is greater than expected_mpps*(1-delta_tolerance). + """ + # Find SUT DPDK driver to determine driver specific performance optimization flags + sut_dpdk_driver = self._ctx.sut_node.config.ports[0].os_driver_for_dpdk + + for params in self.test_parameters: + frame_size = params["frame_size"] + num_descriptors = params["num_descriptors"] + + driver_specific_testpmd_args: TestPmdParamsDict = { + "tx_ring": TXRingParams(descriptors=num_descriptors), + "rx_ring": RXRingParams(descriptors=num_descriptors), + "nb_cores": 1, + } + + if sut_dpdk_driver == "mlx5_core": + driver_specific_testpmd_args["burst"] = 64 + driver_specific_testpmd_args["mbcache"] = 512 + elif sut_dpdk_driver == "i40e": + driver_specific_testpmd_args["rx_queues"] = 2 + driver_specific_testpmd_args["tx_queues"] = 2 + + with TestPmd( + **driver_specific_testpmd_args, + ) as testpmd: + params["measured_mpps"] = self._transmit(testpmd, frame_size) + params["performance_delta"] = ( + float(params["measured_mpps"]) - float(params["expected_mpps"]) + ) / float(params["expected_mpps"]) + params["pass"] = float(params["performance_delta"]) >= -self.delta_tolerance + + self._produce_stats_table(self.test_parameters) + + for params in self.test_parameters: + verify( + params["pass"] is True, + f"""Packets forwarded is less than {(1 -self.delta_tolerance)*100}% + of the expected baseline. + Measured MPPS = {params["measured_mpps"]} + Expected MPPS = {params["expected_mpps"]}""", + ) From cfd851f264d81db6b48a0e6ea07099704c7de555 Mon Sep 17 00:00:00 2001 From: Dean Marx Date: Fri, 14 Nov 2025 11:40:02 -0500 Subject: [PATCH 88/99] dts: add QinQ strip and VLAN extend to testpmd shell Add QinQ strip and VLAN extend methods to TestPmdShell class. Signed-off-by: Dean Marx --- dts/api/testpmd/__init__.py | 52 +++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/dts/api/testpmd/__init__.py b/dts/api/testpmd/__init__.py index 0891b9edbc2..34e6b09c357 100644 --- a/dts/api/testpmd/__init__.py +++ b/dts/api/testpmd/__init__.py @@ -853,6 +853,58 @@ def set_vlan_filter(self, port: int, enable: bool, verify: bool = True) -> None: filter on port {port}""" ) + def set_vlan_extend(self, port: int, enable: bool, verify: bool = True) -> None: + """Set vlan extend. + + Args: + port: The port number to enable VLAN extend on. + enable: Enable extend on `port` if :data:`True`, otherwise disable it. + verify: If :data:`True`, the output of the command and show port info + is scanned to verify that vlan extend was set successfully. + + Raises: + InteractiveCommandExecutionError: If `verify` is :data:`True` and extend + fails to update. + """ + extend_cmd_output = self.send_command(f"vlan set extend {'on' if enable else 'off'} {port}") + if verify: + vlan_settings = self.show_port_info(port_id=port).vlan_offload + if enable ^ (vlan_settings is not None and VLANOffloadFlag.EXTEND in vlan_settings): + self._logger.debug( + f"""Failed to {"enable" if enable else "disable"} + extend on port {port}: \n{extend_cmd_output}""" + ) + raise InteractiveCommandExecutionError( + f"""Failed to {"enable" if enable else "disable"} extend on port {port}""" + ) + + def set_qinq_strip(self, port: int, enable: bool, verify: bool = True) -> None: + """Set QinQ strip. + + Args: + port: The port number to enable QinQ strip on. + enable: Enable stripping on `port` if :data:`True`, otherwise disable it. + verify: If :data:`True`, the output of the command and show port info + is scanned to verify that QinQ strip was set successfully. + + Raises: + InteractiveCommandExecutionError: If `verify` is :data:`True` and QinQ strip + fails to update. + """ + qinq_cmd_output = self.send_command( + f"vlan set qinq_strip {'on' if enable else 'off'} {port}" + ) + if verify: + vlan_settings = self.show_port_info(port_id=port).vlan_offload + if enable ^ (vlan_settings is not None and VLANOffloadFlag.QINQ_STRIP in vlan_settings): + self._logger.debug( + f"Failed to {"enable" if enable else "disable"}" + f"QinQ strip on port {port}: \n{qinq_cmd_output}" + ) + raise InteractiveCommandExecutionError( + f"Failed to {"enable" if enable else "disable"} QinQ strip on port {port}" + ) + def set_mac_address(self, port: int, mac_address: str, verify: bool = True) -> None: """Set port's MAC address. From 4f69c54bbb2b9d3c925ca987fcc5343bc6ebcfed Mon Sep 17 00:00:00 2001 From: Dean Marx Date: Fri, 14 Nov 2025 11:40:03 -0500 Subject: [PATCH 89/99] dts: add QinQ test suite Add QinQ test suite, which verifies PMD behavior when sending QinQ (IEEE 802.1ad) packets. Signed-off-by: Dean Marx Reviewed-by: Patrick Robb --- dts/tests/TestSuite_qinq.py | 206 ++++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 dts/tests/TestSuite_qinq.py diff --git a/dts/tests/TestSuite_qinq.py b/dts/tests/TestSuite_qinq.py new file mode 100644 index 00000000000..bb385a0d91d --- /dev/null +++ b/dts/tests/TestSuite_qinq.py @@ -0,0 +1,206 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2025 University of New Hampshire + +"""QinQ (802.1ad) Test Suite. + +This test suite verifies the correctness and capability of DPDK Poll Mode Drivers (PMDs) +in handling QinQ-tagged Ethernet frames, which contain a pair of stacked VLAN headers +(outer S-VLAN and inner C-VLAN). These tests ensure that both software and hardware offloads +related to QinQ behave as expected across different NIC vendors and PMD implementations. +""" + +from typing import Optional + +from scapy.layers.inet import IP, UDP +from scapy.layers.l2 import Dot1AD, Dot1Q, Ether +from scapy.packet import Packet, Raw + +from api.capabilities import NicCapability, requires_nic_capability +from api.packet import send_packet_and_capture +from api.test import log, verify +from api.testpmd import TestPmd +from framework.test_suite import TestSuite, func_test + + +class TestQinq(TestSuite): + """QinQ test suite. + + This suite consists of 2 test cases: + 1. QinQ Forwarding: Send a QinQ packet and verify the received packet contains + both QinQ/VLAN layers. + 2. QinQ Strip: Enable VLAN/QinQ stripping and verify sent packets are received with the + expected VLAN/QinQ layers. + """ + + def _send_packet_and_verify( + self, packet: Packet, testpmd: TestPmd, should_receive: bool + ) -> None: + """Send packet and verify reception. + + Args: + packet: The packet to send to testpmd. + testpmd: The testpmd session to send commands to. + should_receive: If :data:`True`, verifies packet was received. + """ + testpmd.start() + packets = send_packet_and_capture(packet=packet) + test_packet = self._get_relevant_packet(packets) + if should_receive: + verify(test_packet is not None, "Packet was dropped when it should have been received.") + else: + verify(test_packet is None, "Packet was received when it should have been dropped.") + + def _strip_verify(self, packet: Optional[Packet], expects_tag: bool, context: str) -> bool: + """Helper method for verifying packet stripping functionality. + + Returns: :data:`True` if tags are stripped or not stripped accordingly, + otherwise :data:`False` + """ + if packet is None: + log(f"{context} packet was dropped when it should have been received.") + return False + + if not expects_tag: + if packet.haslayer(Dot1Q) or packet.haslayer(Dot1AD): + log( + f"VLAN tags found in packet when should have been stripped: " + f"{packet.summary()}\tsent packet: {context}", + ) + return False + + if expects_tag: + if vlan_layer := packet.getlayer(Dot1Q): + if vlan_layer.vlan != 200: + log( + f"Expected VLAN ID 200 but found ID {vlan_layer.vlan}: " + f"{packet.summary()}\tsent packet: {context}", + ) + return False + else: + log( + f"Expected 0x8100 VLAN tag but none found: {packet.summary()}" + f"\tsent packet: {context}" + ) + return False + + return True + + def _get_relevant_packet(self, packet_list: list[Packet]) -> Optional[Packet]: + """Helper method for checking received packet list for sent packet.""" + for packet in packet_list: + if hasattr(packet, "load") and b"xxxxx" in packet.load: + return packet + return None + + @func_test + def test_qinq_forwarding(self) -> None: + """QinQ Rx filter test case. + + Steps: + Launch testpmd with mac forwarding mode. + Disable VLAN filter mode on port 0. + Send test packet and capture verbose output. + + Verify: + Check that the received packet has two separate VLAN layers in proper QinQ fashion. + Check that the received packet outer and inner VLAN layer has the appropriate ID. + """ + test_packet = ( + Ether(dst="ff:ff:ff:ff:ff:ff") + / Dot1AD(vlan=100) + / Dot1Q(vlan=200) + / IP(dst="1.2.3.4") + / UDP(dport=1234, sport=4321) + / Raw(load="xxxxx") + ) + with TestPmd() as testpmd: + testpmd.set_vlan_filter(0, False) + testpmd.start() + received_packets = send_packet_and_capture(test_packet) + packet = self._get_relevant_packet(received_packets) + + verify(packet is not None, "Packet was dropped when it should have been received.") + + if packet is not None: + verify( + bool(packet.haslayer(Dot1AD)) and bool(packet.haslayer(Dot1Q)), + "QinQ/VLAN layers not found in packet", + ) + + if outer_vlan := packet.getlayer(Dot1AD): + outer_vlan_id = outer_vlan.vlan + verify( + outer_vlan_id == 100, + f"Outer VLAN ID was {outer_vlan_id} when it should have been 100.", + ) + else: + verify(False, "VLAN layer not found in received packet.") + + if outer_vlan and (inner_vlan := outer_vlan.getlayer(Dot1Q)): + inner_vlan_id = inner_vlan.vlan + verify( + inner_vlan_id == 200, + f"Inner VLAN ID was {inner_vlan_id} when it should have been 200", + ) + + @requires_nic_capability(NicCapability.PORT_RX_OFFLOAD_QINQ_STRIP) + @func_test + def test_qinq_strip(self) -> None: + """Test combinations of VLAN/QinQ strip settings with various QinQ packets. + + Steps: + Launch testpmd with QinQ and VLAN strip enabled. + Send four VLAN/QinQ related test packets. + + Verify: + Check received packets have the expected VLAN/QinQ layers/tags. + """ + test_packets = [ + Ether() / Dot1Q() / IP() / UDP(dport=1234, sport=4321) / Raw(load="xxxxx"), + Ether() + / Dot1Q(vlan=100) + / Dot1Q(vlan=200) + / IP() + / UDP(dport=1234, sport=4321) + / Raw(load="xxxxx"), + Ether() / Dot1AD() / IP() / UDP(dport=1234, sport=4321) / Raw(load="xxxxx"), + Ether() / Dot1AD() / Dot1Q() / IP() / UDP(dport=1234, sport=4321) / Raw(load="xxxxx"), + ] + with TestPmd() as testpmd: + testpmd.set_qinq_strip(0, True) + testpmd.set_vlan_strip(0, True) + testpmd.start() + + received_packets1 = send_packet_and_capture(test_packets[0]) + vlan_packet = self._get_relevant_packet(received_packets1) + received_packets2 = send_packet_and_capture(test_packets[1]) + double_vlan_packet = self._get_relevant_packet(received_packets2) + received_packets3 = send_packet_and_capture(test_packets[2]) + single_88a8_packet = self._get_relevant_packet(received_packets3) + received_packets4 = send_packet_and_capture(test_packets[3]) + qinq_packet = self._get_relevant_packet(received_packets4) + + testpmd.stop() + + tests = [ + ("Single 8100 tag", self._strip_verify(vlan_packet, False, "Single 8100 tag")), + ( + "Double 8100 tag", + self._strip_verify(double_vlan_packet, True, "Double 8100 tag"), + ), + ( + "Single 88a8 tag", + self._strip_verify(single_88a8_packet, False, "Single 88a8 tag"), + ), + ( + "QinQ (88a8 and 8100 tags)", + self._strip_verify(qinq_packet, False, "QinQ (88a8 and 8100 tags)"), + ), + ] + + failed = [ctx for ctx, result in tests if not result] + + verify( + not failed, + f"The following packets were not stripped correctly: {', '.join(failed)}", + ) From 8e1bdc67ee427dad220a23d47f62dea4fe31d3cf Mon Sep 17 00:00:00 2001 From: Dean Marx Date: Fri, 14 Nov 2025 13:59:44 -0500 Subject: [PATCH 90/99] doc: update DTS guide about API module Rewrite the section of dts.rst outlining use of the DTS framework in writing test suites to properly reflect the current state of the DTS API and how to use it. Signed-off-by: Dean Marx --- doc/guides/tools/dts.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/doc/guides/tools/dts.rst b/doc/guides/tools/dts.rst index 46c849a7aa5..5b9a3480164 100644 --- a/doc/guides/tools/dts.rst +++ b/doc/guides/tools/dts.rst @@ -511,20 +511,19 @@ Leveraging the DTS framework in writing testsuites One should avoid directly importing DTS framework code to their testsuites where possible. Instead, for performing common processes required in testsuites, -one should use (or add to) the list of methods provided in the ``Testsuite`` class -(the base class of all testsuites). +one should use (or add to) the list of methods provided in the ``api`` module. For instance, for sending a list of packets, one should work through the packet transmitting function -already made available in the ``TestSuite`` class, +already made available in the ``api`` module, instead of directly importing the DTS traffic generator class and using that class in one's testsuite implementation. It is also acceptable to import and instantiate classes for various DPDK applications. For instance, writing a testsuite for a simple packet forwarding operation -would involve importing the DTS ``TestPmd`` class, +would involve importing the DTS ``TestPmd`` class from the ``api`` module, instantiating ``TestPmd``, calling ``TestPmd``'s ``start()`` method, and then sending traffic via one of the traffic transmitting functions -exposed in the ``Testsuite`` class. +exposed in the ``api`` module. Test Case Verification ~~~~~~~~~~~~~~~~~~~~~~ From d24b5031406682576c6c2fbb868d20d5dd4c1c4d Mon Sep 17 00:00:00 2001 From: Patrick Robb Date: Thu, 13 Nov 2025 16:56:17 -0500 Subject: [PATCH 91/99] doc: ignore Pydantic for DTS autodoc Mock the Pydantic import so that even when Pydantic is available on the system, it is not loaded by Sphinx, ensuring we perform the doc build without Pydantic regardless of the environment. Signed-off-by: Patrick Robb --- doc/guides/conf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/guides/conf.py b/doc/guides/conf.py index 2c1aa81bbff..5f0a602741c 100644 --- a/doc/guides/conf.py +++ b/doc/guides/conf.py @@ -106,6 +106,9 @@ # fabric.Connection (without) vs. fabric.connection.Connection (with) autodoc_mock_imports = importlib.import_module('check-dts-requirements').get_missing_imports() + # Always mock Pydantic to avoid autodoc introspecting its internals. + autodoc_mock_imports = list(set(autodoc_mock_imports + ['pydantic', 'pydantic_core'])) + # ####### :numref: fallback ######## # The following hook functions add some simple handling for the :numref: From 2c82b4f6b947867eab940b39d00c4b18f2f1c761 Mon Sep 17 00:00:00 2001 From: Thomas Monjalon Date: Sun, 2 Nov 2025 17:14:10 +0100 Subject: [PATCH 92/99] maintainers: remove some inactives Some maintainers known to be inactive for at least a year are removed to make clearer where help is needed. Signed-off-by: Thomas Monjalon --- MAINTAINERS | 4 ---- 1 file changed, 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7fa01419cf9..27a4981b10d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -72,7 +72,6 @@ T: git://dpdk.org/next/dpdk-next-dts Stable Branches M: Luca Boccassi M: Kevin Traynor -M: Christian Ehrhardt M: Xueming Li T: git://dpdk.org/dpdk-stable @@ -139,7 +138,6 @@ F: devtools/check-meson.py Public CI M: Aaron Conole -M: Michael Santana F: .github/workflows/build.yml F: .ci/ @@ -160,7 +158,6 @@ Environment Abstraction Layer T: git://dpdk.org/dpdk EAL API and common code -M: Tyler Retzlaff F: lib/eal/common/ F: lib/eal/unix/ F: lib/eal/include/ @@ -392,7 +389,6 @@ F: kernel/freebsd/nic_uio/ Windows support M: Dmitry Kozlyuk -M: Tyler Retzlaff F: lib/eal/windows/ F: doc/guides/windows_gsg/ From 22fc97ba1d67f18d10e29ace2f19f8c0d52534e3 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 12 Nov 2025 12:05:09 -0800 Subject: [PATCH 93/99] pdump: fix integer type for bursts There are warnings from Coverity and other tools that handling of intermediate bursts may wraparound. Shouldn't be possible but use unsigned int to avoid any issues, and just as fast. Coverity issue: 499471 Fixes: 0dea03ef2e8c ("pdump: remove use of VLA") Signed-off-by: Stephen Hemminger --- lib/pdump/rte_pdump.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/lib/pdump/rte_pdump.c b/lib/pdump/rte_pdump.c index 039a4013ddd..c3d0ffa779e 100644 --- a/lib/pdump/rte_pdump.c +++ b/lib/pdump/rte_pdump.c @@ -27,7 +27,7 @@ RTE_LOG_REGISTER_DEFAULT(pdump_logtype, NOTICE); /* Used for the multi-process communication */ #define PDUMP_MP "mp_pdump" -#define PDUMP_BURST_SIZE 32 +#define PDUMP_BURST_SIZE 32u /* Overly generous timeout for secondary to respond */ #define MP_TIMEOUT_S 5 @@ -139,13 +139,11 @@ pdump_cb_release(struct pdump_rxtx_cbs *cbs) static void pdump_copy_burst(uint16_t port_id, uint16_t queue_id, enum rte_pcapng_direction direction, - struct rte_mbuf **pkts, uint16_t nb_pkts, + struct rte_mbuf **pkts, unsigned int nb_pkts, const struct pdump_rxtx_cbs *cbs, struct rte_pdump_stats *stats) { - unsigned int i; - int ring_enq; - uint16_t d_pkts = 0; + unsigned int i, ring_enq, d_pkts = 0; struct rte_mbuf *dup_bufs[PDUMP_BURST_SIZE]; /* duplicated packets */ struct rte_ring *ring; struct rte_mempool *mp; @@ -188,7 +186,7 @@ pdump_copy_burst(uint16_t port_id, uint16_t queue_id, dup_bufs[d_pkts++] = p; } - if (unlikely(d_pkts == 0)) + if (d_pkts == 0) return; rte_atomic_fetch_add_explicit(&stats->accepted, d_pkts, rte_memory_order_relaxed); @@ -210,10 +208,10 @@ pdump_copy(uint16_t port_id, uint16_t queue_id, const struct pdump_rxtx_cbs *cbs, struct rte_pdump_stats *stats) { - uint16_t offs = 0; + unsigned int offs = 0; do { - uint16_t n = RTE_MIN(nb_pkts - offs, PDUMP_BURST_SIZE); + unsigned int n = RTE_MIN(nb_pkts - offs, PDUMP_BURST_SIZE); pdump_copy_burst(port_id, queue_id, direction, &pkts[offs], n, cbs, stats); offs += n; From 928f43e3f9c12bd1e8eacbbc3c63f07896b64d92 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 12 Nov 2025 12:05:10 -0800 Subject: [PATCH 94/99] pdump: fix race in disabling There is a race where the request to disable pdump may get ahead of the handling of pdump requests in dumpcap. The fix is to do local removal of callbacks before forwarding same to secondary. To reproduce: 1. Start testpmd and start traffic 2. Start dumpcap to capture 3. Interrupt dumpcap with ^C Testpmd will show missing response and dumpcap will show error: EAL: Cannot find action: mp_pdump Only reproducible if additional logging not enabled. Fixes: c3ceb8742295 ("pdump: forward callback enable to secondary process") Signed-off-by: Stephen Hemminger --- lib/pdump/rte_pdump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pdump/rte_pdump.c b/lib/pdump/rte_pdump.c index c3d0ffa779e..ac94efe7ff8 100644 --- a/lib/pdump/rte_pdump.c +++ b/lib/pdump/rte_pdump.c @@ -576,12 +576,12 @@ __pdump_request(void *param) PDUMP_LOG_LINE(DEBUG, "primary pdump %s", pdump_opname(req->op)); ret = set_pdump_rxtx_cbs(req); - ret = pdump_send_response(req, ret, bundle->peer); /* Primary process is responsible for broadcasting request to all secondaries */ if (ret == 0) pdump_request_to_secondary(req); + pdump_send_response(req, ret, bundle->peer); free(bundle); } From cf1e03f881af08234892ab2649fb2953ffff52f5 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 11 Nov 2025 14:55:46 -0800 Subject: [PATCH 95/99] test/bpf: add ELF loading Create an ELF file to load using clang. Repackage the object into an array using xdd. Write a test to see load and run the BPF. If libelf library is not available, then DPDK bpf will return -ENOTSUP to the test and the test will be skipped. Signed-off-by: Stephen Hemminger Acked-by: Marat Khalili Tested-by: Marat Khalili Acked-by: Konstantin Ananyev --- app/test/bpf/load.c | 51 +++++++++++++ app/test/bpf/meson.build | 52 +++++++++++++ app/test/meson.build | 2 + app/test/test_bpf.c | 159 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 264 insertions(+) create mode 100644 app/test/bpf/load.c create mode 100644 app/test/bpf/meson.build diff --git a/app/test/bpf/load.c b/app/test/bpf/load.c new file mode 100644 index 00000000000..a4d3d61d7ac --- /dev/null +++ b/app/test/bpf/load.c @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * + * BPF program for testing rte_bpf_elf_load + */ + +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned long uint64_t; + +/* Match the structures from test_bpf.c */ +struct dummy_offset { + uint64_t u64; + uint32_t u32; + uint16_t u16; + uint8_t u8; +} __attribute__((packed)); + +struct dummy_vect8 { + struct dummy_offset in[8]; + struct dummy_offset out[8]; +}; + +/* External function declaration - provided by test via xsym */ +extern void dummy_func1(const void *p, uint32_t *v32, uint64_t *v64); + +/* + * Test BPF function that will be loaded from ELF + * This function is compiled version of code used in test_call1 + */ +__attribute__((section("call1"), used)) +uint64_t +test_call1(struct dummy_vect8 *arg) +{ + uint32_t v32; + uint64_t v64; + + /* Load input values */ + v32 = arg->in[0].u32; + v64 = arg->in[0].u64; + + /* Call external function */ + dummy_func1(arg, &v32, &v64); + + /* Store results */ + arg->out[0].u32 = v32; + arg->out[0].u64 = v64; + + v64 += v32; + return v64; +} diff --git a/app/test/bpf/meson.build b/app/test/bpf/meson.build new file mode 100644 index 00000000000..ef858a98ead --- /dev/null +++ b/app/test/bpf/meson.build @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2025 Stephen Hemminger + +bpf_test_hdrs = [ ] + +# use clang to compile to BPF +clang_supports_bpf = false +clang = find_program('clang', required: false) +if clang.found() + clang_supports_bpf = run_command(clang, '-target', 'bpf', '--print-supported-cpus', + check: false).returncode() == 0 +endif + +if not clang_supports_bpf + message('app/test_bpf: no BPF load tests missing clang BPF support') + subdir_done() + +endif + +xxd = find_program('xxd', required: false) +if not xxd.found() + message('app/test_bpf: missing xxd required to convert object to array') + subdir_done() +endif + +# BPF compiler flags +bpf_cflags = [ '-O2', '-target', 'bpf', '-g', '-c'] + +# Enable test in test_bpf.c +cflags += '-DTEST_BPF_ELF_LOAD' + +# BPF sources to compile +bpf_progs = { + 'load': 'test_bpf_load', +} + +foreach bpf_src, bpf_hdr: bpf_progs + # Compile BPF C source to object file + bpf_obj = custom_target(bpf_src + '_o', + input: bpf_src + '.c', + output: bpf_src + '.o', + command: [ clang, bpf_cflags, '@INPUT@', '-o', '@OUTPUT@']) + + # Convert object file to C header using xxd + bpf_test_h = custom_target(bpf_src + '_h', + input: bpf_obj, + output: bpf_hdr + '.h', + command: [ xxd, '-i', '@INPUT@', '@OUTPUT@']) + + resources += bpf_test_h + +endforeach diff --git a/app/test/meson.build b/app/test/meson.build index 8df8d3edd1e..efec42a6bf7 100644 --- a/app/test/meson.build +++ b/app/test/meson.build @@ -281,6 +281,8 @@ if not is_windows install: false) endif +subdir('bpf') + subdir('test_cfgfiles') resources += test_cfgfile_h diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c index 90e10d7d2cc..c4600023585 100644 --- a/app/test/test_bpf.c +++ b/app/test/test_bpf.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -14,6 +15,7 @@ #include #include #include + #include "test.h" #if !defined(RTE_LIB_BPF) @@ -3278,6 +3280,163 @@ test_bpf(void) REGISTER_FAST_TEST(bpf_autotest, true, true, test_bpf); +#ifdef TEST_BPF_ELF_LOAD + +/* + * Helper function to write BPF object data to temporary file. + * Returns temp file path on success, NULL on failure. + * Caller must free the returned path and unlink the file. + */ +static char * +create_temp_bpf_file(const uint8_t *data, size_t size, const char *name) +{ + char *tmpfile = NULL; + int fd; + ssize_t written; + + if (asprintf(&tmpfile, "/tmp/dpdk_bpf_%s_XXXXXX.o", name) < 0) { + printf("%s@%d: asprintf failed: %s\n", + __func__, __LINE__, strerror(errno)); + return NULL; + } + + /* Create and open temp file */ + fd = mkstemps(tmpfile, strlen(".o")); + if (fd < 0) { + printf("%s@%d: mkstemps(%s) failed: %s\n", + __func__, __LINE__, tmpfile, strerror(errno)); + free(tmpfile); + return NULL; + } + + /* Write BPF object data */ + written = write(fd, data, size); + close(fd); + + if (written != (ssize_t)size) { + printf("%s@%d: write failed: %s\n", + __func__, __LINE__, strerror(errno)); + unlink(tmpfile); + free(tmpfile); + return NULL; + } + + return tmpfile; +} + +#include "test_bpf_load.h" + +/* + * Test loading BPF program from an object file. + * This test uses same arguments as previous test_call1 example. + */ +static int +test_bpf_elf_load(void) +{ + static const char test_section[] = "call1"; + uint8_t tbuf[sizeof(struct dummy_vect8)]; + const struct rte_bpf_xsym xsym[] = { + { + .name = RTE_STR(dummy_func1), + .type = RTE_BPF_XTYPE_FUNC, + .func = { + .val = (void *)dummy_func1, + .nb_args = 3, + .args = { + [0] = { + .type = RTE_BPF_ARG_PTR, + .size = sizeof(struct dummy_offset), + }, + [1] = { + .type = RTE_BPF_ARG_PTR, + .size = sizeof(uint32_t), + }, + [2] = { + .type = RTE_BPF_ARG_PTR, + .size = sizeof(uint64_t), + }, + }, + }, + }, + }; + int ret; + + /* Create temp file from embedded BPF object */ + char *tmpfile = create_temp_bpf_file(app_test_bpf_load_o, + app_test_bpf_load_o_len, + "load"); + if (tmpfile == NULL) + return -1; + + /* Try to load BPF program from temp file */ + const struct rte_bpf_prm prm = { + .xsym = xsym, + .nb_xsym = RTE_DIM(xsym), + .prog_arg = { + .type = RTE_BPF_ARG_PTR, + .size = sizeof(tbuf), + }, + }; + + struct rte_bpf *bpf = rte_bpf_elf_load(&prm, tmpfile, test_section); + unlink(tmpfile); + free(tmpfile); + + /* If libelf support is not available */ + if (bpf == NULL && rte_errno == ENOTSUP) + return TEST_SKIPPED; + + TEST_ASSERT(bpf != NULL, "failed to load BPF %d:%s", rte_errno, strerror(rte_errno)); + + /* Prepare test data */ + struct dummy_vect8 *dv = (struct dummy_vect8 *)tbuf; + + memset(dv, 0, sizeof(*dv)); + dv->in[0].u64 = (int32_t)TEST_FILL_1; + dv->in[0].u32 = dv->in[0].u64; + dv->in[0].u16 = dv->in[0].u64; + dv->in[0].u8 = dv->in[0].u64; + + /* Execute loaded BPF program */ + uint64_t rc = rte_bpf_exec(bpf, tbuf); + ret = test_call1_check(rc, tbuf); + TEST_ASSERT(ret == 0, "test_call1_check failed: %d", ret); + + /* Test JIT if available */ + struct rte_bpf_jit jit; + ret = rte_bpf_get_jit(bpf, &jit); + TEST_ASSERT(ret == 0, "rte_bpf_get_jit failed: %d", ret); + + if (jit.func != NULL) { + memset(dv, 0, sizeof(*dv)); + dv->in[0].u64 = (int32_t)TEST_FILL_1; + dv->in[0].u32 = dv->in[0].u64; + dv->in[0].u16 = dv->in[0].u64; + dv->in[0].u8 = dv->in[0].u64; + + rc = jit.func(tbuf); + ret = test_call1_check(rc, tbuf); + TEST_ASSERT(ret == 0, "jit test_call1_check failed: %d", ret); + } + + rte_bpf_destroy(bpf); + + printf("%s: ELF load test passed\n", __func__); + return TEST_SUCCESS; +} +#else + +static int +test_bpf_elf_load(void) +{ + printf("BPF compile not supported, skipping test\n"); + return TEST_SKIPPED; +} + +#endif /* !TEST_BPF_ELF_LOAD */ + +REGISTER_FAST_TEST(bpf_elf_load_autotest, true, true, test_bpf_elf_load); + #ifndef RTE_HAS_LIBPCAP static int From 81038845c90b56b991bb26c669a4d56940a56d66 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 11 Nov 2025 14:55:47 -0800 Subject: [PATCH 96/99] test/bpf: add Rx and Tx filtering New test using null device to test filtering with BPF. If libelf library is not available, then DPDK bpf will return -ENOTSUP to the test and the test will be skipped. Signed-off-by: Stephen Hemminger Acked-by: Marat Khalili Tested-by: Marat Khalili Acked-by: Konstantin Ananyev --- app/test/bpf/filter.c | 53 +++++++ app/test/bpf/meson.build | 1 + app/test/test_bpf.c | 320 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 372 insertions(+), 2 deletions(-) create mode 100644 app/test/bpf/filter.c diff --git a/app/test/bpf/filter.c b/app/test/bpf/filter.c new file mode 100644 index 00000000000..d47233a47ae --- /dev/null +++ b/app/test/bpf/filter.c @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * BPF TX filter program for testing rte_bpf_eth_tx_elf_load + */ + +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned long uint64_t; + +/* + * Simple TX filter that accepts TCP packets + * + * BPF TX programs receive pointer to data and should return: + * 0 = drop packet + * non-zero = rx/tx packet + * + * This filter checks: + * 1. Packet is IPv4 + * 2. Protocol is TCP (IPPROTO_TCP = 6) + */ +__attribute__((section("filter"), used)) +uint64_t +test_filter(void *pkt) +{ + uint8_t *data = pkt; + + /* Read version and IHL (first byte of IP header) */ + uint8_t version_ihl = data[14]; + + /* Check IPv4 version (upper 4 bits should be 4) */ + if ((version_ihl >> 4) != 4) + return 0; + + /* Protocol field (byte 9 of IP header) must be TCP (6) */ + uint8_t proto = data[14 + 9]; + return (proto == 6); +} + +__attribute__((section("drop"), used)) +uint64_t +test_drop(void *pkt) +{ + (void)pkt; + return 0; +} + +__attribute__((section("allow"), used)) +uint64_t +test_allow(void *pkt) +{ + (void)pkt; + return 1; +} diff --git a/app/test/bpf/meson.build b/app/test/bpf/meson.build index ef858a98ead..aaecfa70186 100644 --- a/app/test/bpf/meson.build +++ b/app/test/bpf/meson.build @@ -31,6 +31,7 @@ cflags += '-DTEST_BPF_ELF_LOAD' # BPF sources to compile bpf_progs = { + 'filter': 'test_bpf_filter', 'load': 'test_bpf_load', } diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c index c4600023585..b7c94ba1c70 100644 --- a/app/test/test_bpf.c +++ b/app/test/test_bpf.c @@ -3424,10 +3424,326 @@ test_bpf_elf_load(void) printf("%s: ELF load test passed\n", __func__); return TEST_SUCCESS; } + +#include +#include +#include + +#include "test_bpf_filter.h" + +#define BPF_TEST_BURST 128 +#define BPF_TEST_POOLSIZE 256 /* at least 2x burst */ +#define BPF_TEST_PKT_LEN 64 /* Ether + IP + TCP */ + +static int null_vdev_setup(const char *name, uint16_t *port, struct rte_mempool *pool) +{ + int ret; + + /* Make a null device */ + ret = rte_vdev_init(name, NULL); + TEST_ASSERT(ret == 0, "rte_vdev_init(%s) failed: %d", name, ret); + + ret = rte_eth_dev_get_port_by_name(name, port); + TEST_ASSERT(ret == 0, "failed to get port id for %s: %d", name, ret); + + struct rte_eth_conf conf = { }; + ret = rte_eth_dev_configure(*port, 1, 1, &conf); + TEST_ASSERT(ret == 0, "failed to configure port %u: %d", *port, ret); + + struct rte_eth_txconf txconf = { }; + ret = rte_eth_tx_queue_setup(*port, 0, BPF_TEST_BURST, SOCKET_ID_ANY, &txconf); + TEST_ASSERT(ret == 0, "failed to setup tx queue port %u: %d", *port, ret); + + struct rte_eth_rxconf rxconf = { }; + ret = rte_eth_rx_queue_setup(*port, 0, BPF_TEST_BURST, SOCKET_ID_ANY, + &rxconf, pool); + TEST_ASSERT(ret == 0, "failed to setup rx queue port %u: %d", *port, ret); + + ret = rte_eth_dev_start(*port); + TEST_ASSERT(ret == 0, "failed to start port %u: %d", *port, ret); + + return 0; +} + +static unsigned int +setup_mbufs(struct rte_mbuf *burst[], unsigned int n) +{ + struct rte_ether_hdr eh = { + .ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4), + }; + const struct rte_ipv4_hdr iph = { + .version_ihl = RTE_IPV4_VHL_DEF, + .total_length = rte_cpu_to_be_16(BPF_TEST_PKT_LEN - sizeof(eh)), + .time_to_live = IPDEFTTL, + .src_addr = rte_cpu_to_be_32(ip_src_addr), + .dst_addr = rte_cpu_to_be_32(ip_dst_addr), + }; + unsigned int tcp_count = 0; + + rte_eth_random_addr(eh.dst_addr.addr_bytes); + + for (unsigned int i = 0; i < n; i++) { + struct rte_mbuf *mb = burst[i]; + + /* Setup Ethernet header */ + *rte_pktmbuf_mtod(mb, struct rte_ether_hdr *) = eh; + + /* Setup IP header */ + struct rte_ipv4_hdr *ip + = rte_pktmbuf_mtod_offset(mb, struct rte_ipv4_hdr *, sizeof(eh)); + *ip = iph; + + if (rte_rand() & 1) { + struct rte_udp_hdr *udp + = rte_pktmbuf_mtod_offset(mb, struct rte_udp_hdr *, + sizeof(eh) + sizeof(iph)); + + ip->next_proto_id = IPPROTO_UDP; + *udp = (struct rte_udp_hdr) { + .src_port = rte_cpu_to_be_16(9), /* discard */ + .dst_port = rte_cpu_to_be_16(9), /* discard */ + .dgram_len = BPF_TEST_PKT_LEN - sizeof(eh) - sizeof(iph), + }; + + } else { + struct rte_tcp_hdr *tcp + = rte_pktmbuf_mtod_offset(mb, struct rte_tcp_hdr *, + sizeof(eh) + sizeof(iph)); + + ip->next_proto_id = IPPROTO_TCP; + *tcp = (struct rte_tcp_hdr) { + .src_port = rte_cpu_to_be_16(9), /* discard */ + .dst_port = rte_cpu_to_be_16(9), /* discard */ + .tcp_flags = RTE_TCP_RST_FLAG, + }; + ++tcp_count; + } + } + + return tcp_count; +} + +static int bpf_tx_test(uint16_t port, const char *tmpfile, struct rte_mempool *pool, + const char *section, uint32_t flags) +{ + const struct rte_bpf_prm prm = { + .prog_arg = { + .type = RTE_BPF_ARG_PTR, + .size = sizeof(struct rte_mbuf), + }, + }; + int ret; + + /* Try to load BPF TX program from temp file */ + ret = rte_bpf_eth_tx_elf_load(port, 0, &prm, tmpfile, section, flags); + if (ret != 0) { + printf("%s@%d: failed to load BPF filter from file=%s error=%d:(%s)\n", + __func__, __LINE__, tmpfile, rte_errno, rte_strerror(rte_errno)); + return ret; + } + + struct rte_mbuf *pkts[BPF_TEST_BURST] = { }; + ret = rte_pktmbuf_alloc_bulk(pool, pkts, BPF_TEST_BURST); + TEST_ASSERT(ret == 0, "failed to allocate mbufs"); + + uint16_t expect = setup_mbufs(pkts, BPF_TEST_BURST); + + uint16_t sent = rte_eth_tx_burst(port, 0, pkts, BPF_TEST_BURST); + TEST_ASSERT_EQUAL(sent, expect, "rte_eth_tx_burst returned: %u expected %u", + sent, expect); + + /* The unsent packets should be dropped */ + rte_pktmbuf_free_bulk(pkts + sent, BPF_TEST_BURST - sent); + + /* Pool should have same number of packets avail */ + unsigned int avail = rte_mempool_avail_count(pool); + TEST_ASSERT_EQUAL(avail, BPF_TEST_POOLSIZE, + "Mempool available %u != %u leaks?", avail, BPF_TEST_POOLSIZE); + + rte_bpf_eth_tx_unload(port, 0); + return TEST_SUCCESS; +} + +/* Test loading a transmit filter which only allows IPv4 packets */ +static int +test_bpf_elf_tx_load(void) +{ + static const char null_dev[] = "net_null_bpf0"; + char *tmpfile = NULL; + struct rte_mempool *mb_pool = NULL; + uint16_t port = UINT16_MAX; + int ret; + + printf("%s start\n", __func__); + + /* Make a pool for packets */ + mb_pool = rte_pktmbuf_pool_create("bpf_tx_test_pool", BPF_TEST_POOLSIZE, + 0, 0, RTE_MBUF_DEFAULT_BUF_SIZE, + SOCKET_ID_ANY); + + ret = null_vdev_setup(null_dev, &port, mb_pool); + if (ret != 0) + goto fail; + + /* Create temp file from embedded BPF object */ + tmpfile = create_temp_bpf_file(app_test_bpf_filter_o, app_test_bpf_filter_o_len, "tx"); + if (tmpfile == NULL) + goto fail; + + /* Do test with VM */ + ret = bpf_tx_test(port, tmpfile, mb_pool, "filter", 0); + if (ret != 0) + goto fail; + + /* Repeat with JIT */ + ret = bpf_tx_test(port, tmpfile, mb_pool, "filter", RTE_BPF_ETH_F_JIT); + if (ret == 0) + printf("%s: TX ELF load test passed\n", __func__); + +fail: + if (tmpfile) { + unlink(tmpfile); + free(tmpfile); + } + + if (port != UINT16_MAX) + rte_vdev_uninit(null_dev); + + rte_mempool_free(mb_pool); + + if (ret == 0) + return TEST_SUCCESS; + else if (ret == -ENOTSUP) + return TEST_SKIPPED; + else + return TEST_FAILED; +} + +/* Test loading a receive filter */ +static int bpf_rx_test(uint16_t port, const char *tmpfile, struct rte_mempool *pool, + const char *section, uint32_t flags, uint16_t expected) +{ + struct rte_mbuf *pkts[BPF_TEST_BURST]; + const struct rte_bpf_prm prm = { + .prog_arg = { + .type = RTE_BPF_ARG_PTR, + .size = sizeof(struct rte_mbuf), + }, + }; + int ret; + + /* Load BPF program to drop all packets */ + ret = rte_bpf_eth_rx_elf_load(port, 0, &prm, tmpfile, section, flags); + if (ret != 0) { + printf("%s@%d: failed to load BPF filter from file=%s error=%d:(%s)\n", + __func__, __LINE__, tmpfile, rte_errno, rte_strerror(rte_errno)); + return ret; + } + + uint16_t rcvd = rte_eth_rx_burst(port, 0, pkts, BPF_TEST_BURST); + TEST_ASSERT_EQUAL(rcvd, expected, + "rte_eth_rx_burst returned: %u expect: %u", rcvd, expected); + + /* Drop the received packets */ + rte_pktmbuf_free_bulk(pkts, rcvd); + + rte_bpf_eth_rx_unload(port, 0); + + /* Pool should now be full */ + unsigned int avail = rte_mempool_avail_count(pool); + TEST_ASSERT_EQUAL(avail, BPF_TEST_POOLSIZE, + "Mempool available %u != %u leaks?", avail, BPF_TEST_POOLSIZE); + + return TEST_SUCCESS; +} + +/* Test loading a receive filters, first with drop all and then with allow all packets */ +static int +test_bpf_elf_rx_load(void) +{ + static const char null_dev[] = "net_null_bpf0"; + struct rte_mempool *pool = NULL; + char *tmpfile = NULL; + uint16_t port; + int ret; + + printf("%s start\n", __func__); + + /* Make a pool for packets */ + pool = rte_pktmbuf_pool_create("bpf_rx_test_pool", 2 * BPF_TEST_BURST, + 0, 0, RTE_MBUF_DEFAULT_BUF_SIZE, + SOCKET_ID_ANY); + TEST_ASSERT(pool != NULL, "failed to create mempool"); + + ret = null_vdev_setup(null_dev, &port, pool); + if (ret != 0) + goto fail; + + /* Create temp file from embedded BPF object */ + tmpfile = create_temp_bpf_file(app_test_bpf_filter_o, app_test_bpf_filter_o_len, "rx"); + if (tmpfile == NULL) + goto fail; + + /* Do test with VM */ + ret = bpf_rx_test(port, tmpfile, pool, "drop", 0, 0); + if (ret != 0) + goto fail; + + /* Repeat with JIT */ + ret = bpf_rx_test(port, tmpfile, pool, "drop", RTE_BPF_ETH_F_JIT, 0); + if (ret != 0) + goto fail; + + /* Repeat with allow all */ + ret = bpf_rx_test(port, tmpfile, pool, "allow", 0, BPF_TEST_BURST); + if (ret != 0) + goto fail; + + /* Repeat with JIT */ + ret = bpf_rx_test(port, tmpfile, pool, "allow", RTE_BPF_ETH_F_JIT, BPF_TEST_BURST); + if (ret != 0) + goto fail; + + printf("%s: RX ELF load test passed\n", __func__); + + /* The filter should free the mbufs */ + unsigned int avail = rte_mempool_avail_count(pool); + TEST_ASSERT_EQUAL(avail, BPF_TEST_POOLSIZE, + "Mempool available %u != %u leaks?", avail, BPF_TEST_POOLSIZE); + +fail: + if (tmpfile) { + unlink(tmpfile); + free(tmpfile); + } + + if (port != UINT16_MAX) + rte_vdev_uninit(null_dev); + + rte_mempool_free(pool); + + return ret == 0 ? TEST_SUCCESS : TEST_FAILED; +} + + +static int +test_bpf_elf(void) +{ + int ret; + + ret = test_bpf_elf_load(); + if (ret == TEST_SUCCESS) + ret = test_bpf_elf_tx_load(); + if (ret == TEST_SUCCESS) + ret = test_bpf_elf_rx_load(); + + return ret; +} + #else static int -test_bpf_elf_load(void) +test_bpf_elf(void) { printf("BPF compile not supported, skipping test\n"); return TEST_SKIPPED; @@ -3435,7 +3751,7 @@ test_bpf_elf_load(void) #endif /* !TEST_BPF_ELF_LOAD */ -REGISTER_FAST_TEST(bpf_elf_load_autotest, true, true, test_bpf_elf_load); +REGISTER_FAST_TEST(bpf_elf_autotest, true, true, test_bpf_elf); #ifndef RTE_HAS_LIBPCAP From 54fe33798a76d764cef628bc3b7c192e8c55ac98 Mon Sep 17 00:00:00 2001 From: Raslan Darawsheh Date: Tue, 11 Nov 2025 14:16:28 +0200 Subject: [PATCH 97/99] common/mlx5: add BlueField-4 DPU This adds the BlueField-4 device ID to the list of NVIDIA devices that run the mlx5 drivers. The device is still in development stage. Signed-off-by: Raslan Darawsheh --- drivers/common/mlx5/mlx5_common.h | 1 + drivers/crypto/mlx5/mlx5_crypto.c | 4 ++++ drivers/net/mlx5/mlx5.c | 4 ++++ 3 files changed, 9 insertions(+) diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h index bd8b5add24c..9403385195d 100644 --- a/drivers/common/mlx5/mlx5_common.h +++ b/drivers/common/mlx5/mlx5_common.h @@ -155,6 +155,7 @@ enum { PCI_DEVICE_ID_MELLANOX_BLUEFIELD3 = 0Xa2dc, PCI_DEVICE_ID_MELLANOX_CONNECTX8 = 0x1023, PCI_DEVICE_ID_MELLANOX_CONNECTX9 = 0x1025, + PCI_DEVICE_ID_MELLANOX_BLUEFIELD4 = 0xa2df, }; /* Maximum number of simultaneous unicast MAC addresses. */ diff --git a/drivers/crypto/mlx5/mlx5_crypto.c b/drivers/crypto/mlx5/mlx5_crypto.c index 2699253d870..240e8bb858f 100644 --- a/drivers/crypto/mlx5/mlx5_crypto.c +++ b/drivers/crypto/mlx5/mlx5_crypto.c @@ -491,6 +491,10 @@ static const struct rte_pci_id mlx5_crypto_pci_id_map[] = { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX9) }, + { + RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, + PCI_DEVICE_ID_MELLANOX_BLUEFIELD4) + }, { .vendor_id = 0 } diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 1d07ca4293a..447fd31fd4f 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -3878,6 +3878,10 @@ static const struct rte_pci_id mlx5_pci_id_map[] = { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX9) }, + { + RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, + PCI_DEVICE_ID_MELLANOX_BLUEFIELD4) + }, { .vendor_id = 0 } From ef98b88455bf4a7c8b7aa3106a761c9e9270d6a3 Mon Sep 17 00:00:00 2001 From: Thomas Monjalon Date: Wed, 19 Nov 2025 05:38:43 +0100 Subject: [PATCH 98/99] version: 25.11-rc3 Signed-off-by: Thomas Monjalon --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index b1c9191fb9e..7534e512e6c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -25.11.0-rc2 +25.11.0-rc3 From 6b2df317533043ba6fc1cd646a7ff66128459542 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Wed, 19 Nov 2025 16:56:24 +0800 Subject: [PATCH 99/99] net/txgbe: fix the missing old mailbox interface calls There are some SW-FW interactions still call the old mailbox interface function, which is used for SP devices. It causes the interaction command to time out. Adjust the interaction flow to use a unified function pointer. Fixes: 6a139ade82e7 ("net/txgbe: add new SW-FW mailbox interface") Signed-off-by: Jiawen Wu Signed-off-by: 0-day Robot --- drivers/net/txgbe/base/txgbe_aml.c | 4 ++++ drivers/net/txgbe/base/txgbe_aml40.c | 4 ++++ drivers/net/txgbe/base/txgbe_eeprom.c | 16 +++++++------- drivers/net/txgbe/base/txgbe_hw.c | 4 ++++ drivers/net/txgbe/base/txgbe_mng.c | 32 ++++++++++++--------------- drivers/net/txgbe/base/txgbe_mng.h | 4 ++++ drivers/net/txgbe/base/txgbe_type.h | 2 ++ 7 files changed, 40 insertions(+), 26 deletions(-) diff --git a/drivers/net/txgbe/base/txgbe_aml.c b/drivers/net/txgbe/base/txgbe_aml.c index 867cf4c2d35..b2c870973e1 100644 --- a/drivers/net/txgbe/base/txgbe_aml.c +++ b/drivers/net/txgbe/base/txgbe_aml.c @@ -17,6 +17,7 @@ void txgbe_init_ops_aml(struct txgbe_hw *hw) { struct txgbe_mac_info *mac = &hw->mac; struct txgbe_phy_info *phy = &hw->phy; + struct txgbe_mbx_info *mbx = &hw->mbx; txgbe_init_ops_generic(hw); @@ -27,6 +28,9 @@ void txgbe_init_ops_aml(struct txgbe_hw *hw) mac->init_mac_link_ops = txgbe_init_mac_link_ops_aml; mac->get_link_capabilities = txgbe_get_link_capabilities_aml; mac->check_link = txgbe_check_mac_link_aml; + + /* MAILBOX */ + mbx->host_interface_command = txgbe_host_interface_command_aml; } s32 txgbe_check_mac_link_aml(struct txgbe_hw *hw, u32 *speed, diff --git a/drivers/net/txgbe/base/txgbe_aml40.c b/drivers/net/txgbe/base/txgbe_aml40.c index c7d64cade65..08e8a7e5d26 100644 --- a/drivers/net/txgbe/base/txgbe_aml40.c +++ b/drivers/net/txgbe/base/txgbe_aml40.c @@ -18,6 +18,7 @@ void txgbe_init_ops_aml40(struct txgbe_hw *hw) { struct txgbe_mac_info *mac = &hw->mac; struct txgbe_phy_info *phy = &hw->phy; + struct txgbe_mbx_info *mbx = &hw->mbx; txgbe_init_ops_generic(hw); @@ -28,6 +29,9 @@ void txgbe_init_ops_aml40(struct txgbe_hw *hw) mac->init_mac_link_ops = txgbe_init_mac_link_ops_aml40; mac->get_link_capabilities = txgbe_get_link_capabilities_aml40; mac->check_link = txgbe_check_mac_link_aml40; + + /* MAILBOX */ + mbx->host_interface_command = txgbe_host_interface_command_aml; } s32 txgbe_check_mac_link_aml40(struct txgbe_hw *hw, u32 *speed, diff --git a/drivers/net/txgbe/base/txgbe_eeprom.c b/drivers/net/txgbe/base/txgbe_eeprom.c index eb53b35a196..b1d90f6f6b4 100644 --- a/drivers/net/txgbe/base/txgbe_eeprom.c +++ b/drivers/net/txgbe/base/txgbe_eeprom.c @@ -137,7 +137,7 @@ void txgbe_release_eeprom_semaphore(struct txgbe_hw *hw) s32 txgbe_ee_read16(struct txgbe_hw *hw, u32 offset, u16 *data) { - const u32 mask = TXGBE_MNGSEM_SWMBX | TXGBE_MNGSEM_SWFLASH; + const u32 mask = TXGBE_MNGSEM_SWFLASH; u32 addr = (offset << 1); int err; @@ -164,7 +164,7 @@ s32 txgbe_ee_read16(struct txgbe_hw *hw, u32 offset, s32 txgbe_ee_readw_buffer(struct txgbe_hw *hw, u32 offset, u32 words, void *data) { - const u32 mask = TXGBE_MNGSEM_SWMBX | TXGBE_MNGSEM_SWFLASH; + const u32 mask = TXGBE_MNGSEM_SWFLASH; u32 addr = (offset << 1); u32 len = (words << 1); u8 *buf = (u8 *)data; @@ -195,7 +195,7 @@ s32 txgbe_ee_readw_buffer(struct txgbe_hw *hw, s32 txgbe_ee_readw_sw(struct txgbe_hw *hw, u32 offset, u16 *data) { - const u32 mask = TXGBE_MNGSEM_SWMBX | TXGBE_MNGSEM_SWFLASH; + const u32 mask = TXGBE_MNGSEM_SWFLASH; u32 addr = hw->rom.sw_addr + (offset << 1); int err; @@ -220,7 +220,7 @@ s32 txgbe_ee_readw_sw(struct txgbe_hw *hw, u32 offset, **/ s32 txgbe_ee_read32(struct txgbe_hw *hw, u32 addr, u32 *data) { - const u32 mask = TXGBE_MNGSEM_SWMBX | TXGBE_MNGSEM_SWFLASH; + const u32 mask = TXGBE_MNGSEM_SWFLASH; int err; err = hw->mac.acquire_swfw_sync(hw, mask); @@ -245,7 +245,7 @@ s32 txgbe_ee_read32(struct txgbe_hw *hw, u32 addr, u32 *data) s32 txgbe_ee_write16(struct txgbe_hw *hw, u32 offset, u16 data) { - const u32 mask = TXGBE_MNGSEM_SWMBX | TXGBE_MNGSEM_SWFLASH; + const u32 mask = TXGBE_MNGSEM_SWFLASH; u32 addr = (offset << 1); int err; @@ -272,7 +272,7 @@ s32 txgbe_ee_write16(struct txgbe_hw *hw, u32 offset, s32 txgbe_ee_writew_buffer(struct txgbe_hw *hw, u32 offset, u32 words, void *data) { - const u32 mask = TXGBE_MNGSEM_SWMBX | TXGBE_MNGSEM_SWFLASH; + const u32 mask = TXGBE_MNGSEM_SWFLASH; u32 addr = (offset << 1); u32 len = (words << 1); u8 *buf = (u8 *)data; @@ -301,7 +301,7 @@ s32 txgbe_ee_writew_buffer(struct txgbe_hw *hw, s32 txgbe_ee_writew_sw(struct txgbe_hw *hw, u32 offset, u16 data) { - const u32 mask = TXGBE_MNGSEM_SWMBX | TXGBE_MNGSEM_SWFLASH; + const u32 mask = TXGBE_MNGSEM_SWFLASH; u32 addr = hw->rom.sw_addr + (offset << 1); int err; @@ -326,7 +326,7 @@ s32 txgbe_ee_writew_sw(struct txgbe_hw *hw, u32 offset, **/ s32 txgbe_ee_write32(struct txgbe_hw *hw, u32 addr, u32 data) { - const u32 mask = TXGBE_MNGSEM_SWMBX | TXGBE_MNGSEM_SWFLASH; + const u32 mask = TXGBE_MNGSEM_SWFLASH; int err; err = hw->mac.acquire_swfw_sync(hw, mask); diff --git a/drivers/net/txgbe/base/txgbe_hw.c b/drivers/net/txgbe/base/txgbe_hw.c index 50178868969..c911234822f 100644 --- a/drivers/net/txgbe/base/txgbe_hw.c +++ b/drivers/net/txgbe/base/txgbe_hw.c @@ -2998,6 +2998,7 @@ void txgbe_init_ops_sp(struct txgbe_hw *hw) { struct txgbe_mac_info *mac = &hw->mac; struct txgbe_phy_info *phy = &hw->phy; + struct txgbe_mbx_info *mbx = &hw->mbx; txgbe_init_ops_generic(hw); @@ -3008,6 +3009,9 @@ void txgbe_init_ops_sp(struct txgbe_hw *hw) mac->init_mac_link_ops = txgbe_init_mac_link_ops_sp; mac->get_link_capabilities = txgbe_get_link_capabilities_sp; mac->check_link = txgbe_check_mac_link_sp; + + /* MAILBOX */ + mbx->host_interface_command = txgbe_host_interface_command_sp; } /** diff --git a/drivers/net/txgbe/base/txgbe_mng.c b/drivers/net/txgbe/base/txgbe_mng.c index 05eb07c0e28..a1974820b60 100644 --- a/drivers/net/txgbe/base/txgbe_mng.c +++ b/drivers/net/txgbe/base/txgbe_mng.c @@ -79,7 +79,7 @@ txgbe_hic_unlocked(struct txgbe_hw *hw, u32 *buffer, u32 length, u32 timeout) } /** - * txgbe_host_interface_command - Issue command to manageability block + * txgbe_host_interface_command_sp - Issue command to manageability block * @hw: pointer to the HW structure * @buffer: contains the command to write and where the return status will * be placed @@ -96,9 +96,9 @@ txgbe_hic_unlocked(struct txgbe_hw *hw, u32 *buffer, u32 length, u32 timeout) * else returns semaphore error when encountering an error acquiring * semaphore or TXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. **/ -static s32 -txgbe_host_interface_command(struct txgbe_hw *hw, u32 *buffer, - u32 length, u32 timeout, bool return_data) +s32 +txgbe_host_interface_command_sp(struct txgbe_hw *hw, u32 *buffer, + u32 length, u32 timeout, bool return_data) { u32 hdr_size = sizeof(struct txgbe_hic_hdr); struct txgbe_hic_hdr *resp = (struct txgbe_hic_hdr *)buffer; @@ -160,7 +160,7 @@ txgbe_host_interface_command(struct txgbe_hw *hw, u32 *buffer, return err; } -static s32 +s32 txgbe_host_interface_command_aml(struct txgbe_hw *hw, u32 *buffer, u32 length, u32 timeout, bool return_data) { @@ -303,12 +303,8 @@ s32 txgbe_hic_sr_read(struct txgbe_hw *hw, u32 addr, u8 *buf, int len) command.address = cpu_to_be32(addr); command.length = cpu_to_be16(len); - if (hw->mac.type == txgbe_mac_aml || hw->mac.type == txgbe_mac_aml40) - err = txgbe_host_interface_command_aml(hw, (u32 *)&command, - sizeof(command), TXGBE_HI_COMMAND_TIMEOUT, false); - else - err = txgbe_hic_unlocked(hw, (u32 *)&command, - sizeof(command), TXGBE_HI_COMMAND_TIMEOUT); + err = hw->mbx.host_interface_command(hw, (u32 *)&command, + sizeof(command), TXGBE_HI_COMMAND_TIMEOUT, false); if (err) return err; @@ -381,7 +377,7 @@ s32 txgbe_close_notify(struct txgbe_hw *hw) buffer.length = 0; buffer.address = 0; - status = txgbe_host_interface_command(hw, (u32 *)&buffer, + status = hw->mbx.host_interface_command(hw, (u32 *)&buffer, sizeof(buffer), TXGBE_HI_COMMAND_TIMEOUT, false); if (status) @@ -411,7 +407,7 @@ s32 txgbe_open_notify(struct txgbe_hw *hw) buffer.length = 0; buffer.address = 0; - status = txgbe_host_interface_command(hw, (u32 *)&buffer, + status = hw->mbx.host_interface_command(hw, (u32 *)&buffer, sizeof(buffer), TXGBE_HI_COMMAND_TIMEOUT, false); if (status) @@ -466,7 +462,7 @@ s32 txgbe_hic_set_drv_ver(struct txgbe_hw *hw, u8 maj, u8 min, (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len)); for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) { - ret_val = txgbe_host_interface_command(hw, (u32 *)&fw_cmd, + ret_val = hw->mbx.host_interface_command(hw, (u32 *)&fw_cmd, sizeof(fw_cmd), TXGBE_HI_COMMAND_TIMEOUT, true); @@ -511,7 +507,7 @@ txgbe_hic_reset(struct txgbe_hw *hw) (FW_CEM_HDR_LEN + reset_cmd.hdr.buf_len)); for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) { - err = txgbe_host_interface_command(hw, (u32 *)&reset_cmd, + err = hw->mbx.host_interface_command(hw, (u32 *)&reset_cmd, sizeof(reset_cmd), TXGBE_HI_COMMAND_TIMEOUT, true); @@ -568,7 +564,7 @@ s32 txgbe_hic_get_lldp(struct txgbe_hw *hw) buffer.hdr.checksum = FW_DEFAULT_CHECKSUM; buffer.func = hw->bus.lan_id; - err = txgbe_host_interface_command(hw, (u32 *)&buffer, sizeof(buffer), + err = hw->mbx.host_interface_command(hw, (u32 *)&buffer, sizeof(buffer), TXGBE_HI_COMMAND_TIMEOUT, true); if (err) return err; @@ -599,7 +595,7 @@ s32 txgbe_hic_set_lldp(struct txgbe_hw *hw, bool on) buffer.hdr.checksum = FW_DEFAULT_CHECKSUM; buffer.func = hw->bus.lan_id; - return txgbe_host_interface_command(hw, (u32 *)&buffer, sizeof(buffer), + return hw->mbx.host_interface_command(hw, (u32 *)&buffer, sizeof(buffer), TXGBE_HI_COMMAND_TIMEOUT, false); } @@ -619,7 +615,7 @@ s32 txgbe_hic_ephy_set_link(struct txgbe_hw *hw, u8 speed, u8 autoneg, u8 duplex buffer.duplex = duplex; for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) { - status = txgbe_host_interface_command(hw, (u32 *)&buffer, + status = hw->mbx.host_interface_command(hw, (u32 *)&buffer, sizeof(buffer), TXGBE_HI_COMMAND_TIMEOUT_SHORT, true); if (status != 0) { diff --git a/drivers/net/txgbe/base/txgbe_mng.h b/drivers/net/txgbe/base/txgbe_mng.h index 53c5cd5487c..da5efecefde 100644 --- a/drivers/net/txgbe/base/txgbe_mng.h +++ b/drivers/net/txgbe/base/txgbe_mng.h @@ -208,6 +208,10 @@ struct txgbe_hic_write_lldp { u16 pad3; }; +s32 txgbe_host_interface_command_sp(struct txgbe_hw *hw, u32 *buffer, + u32 length, u32 timeout, bool return_data); +s32 txgbe_host_interface_command_aml(struct txgbe_hw *hw, u32 *buffer, + u32 length, u32 timeout, bool return_data); s32 txgbe_hic_sr_read(struct txgbe_hw *hw, u32 addr, u8 *buf, int len); s32 txgbe_hic_sr_write(struct txgbe_hw *hw, u32 addr, u8 *buf, int len); s32 txgbe_close_notify(struct txgbe_hw *hw); diff --git a/drivers/net/txgbe/base/txgbe_type.h b/drivers/net/txgbe/base/txgbe_type.h index b5dbc9b755a..ede780321f4 100644 --- a/drivers/net/txgbe/base/txgbe_type.h +++ b/drivers/net/txgbe/base/txgbe_type.h @@ -786,6 +786,8 @@ struct txgbe_mbx_info { s32 (*check_for_msg)(struct txgbe_hw *hw, u16 mbx_id); s32 (*check_for_ack)(struct txgbe_hw *hw, u16 mbx_id); s32 (*check_for_rst)(struct txgbe_hw *hw, u16 mbx_id); + s32 (*host_interface_command)(struct txgbe_hw *hw, u32 *buffer, + u32 length, u32 timeout, bool return_data); struct txgbe_mbx_stats stats; u32 timeout;