Skip to content

Commit

Permalink
net/mlx5: fix counters map in bonding mode
Browse files Browse the repository at this point in the history
[ upstream commit a687c3e658c2d889052089af8340bc0b9299c856 ]

In the HW-LAG mode, there is only one mlx5 IB device with 2 ETH
interfaces. In theory, the settings on both ports should be the same.
But in the real life, some inconsistent settings may be done by the
user and the PMD is not aware of this.

In the previous implementation, the xstats map was generated from the
information fetched on the 1st port of a bonding interface. If the
2nd port had a different settings, the number and the order of the
counters may differ from that of the 1st one. The ioctl() call may
corrupt the user buffers (copy_to_user) and cause a crash.

The commit will change the map between the driver counters to the
PMD user counters.
  1. Switch the inner and outer loop to speed up the initialization
     time AMAP - since there will be >300 counters returned from the
     driver.
  2. Generate an unique map for both ports in LAG mode.
    a. Scan the 1st port and find the supported counters' strings,
       then add to the map.
    b. In bonding, scan the 2nd port and find the strings. If one is
       already in the map, use the index. Or append to the next free
       slot.
    c. Append the device counters that needs to be fetched via sysfs
       or Devx command. This kind of counter(s) is unique per IB
       device.

After querying the statistics from the driver, the value will be read
from the proper offset in the "struct ethtool_stats" and then added
into the output array based on the map information. In bonding mode,
the statistics from both ports will be accumulated if the counters
are valid on both ports.

Compared to the system call or Devx command, the overhead introduced
by the extra index comparison is light and should not cause a
significant degradation.

The application should ensure that the port settings should not be
changed out of the DPDK application dynamically in most cases. Or
else the change cannot be notified and the counters map might not
be valid when the number doesn't change but the counters set had
changed. A device restart will help to re-initialize the map from
scrath.

Fixes: 7ed15ac ("net/mlx5: improve xstats of bonding port")

Signed-off-by: Bing Zhao <bingz@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
  • Loading branch information
zorrohahaha authored and kevintraynor committed Mar 8, 2024
1 parent 03c7f0a commit 2481136
Show file tree
Hide file tree
Showing 4 changed files with 242 additions and 102 deletions.
249 changes: 177 additions & 72 deletions drivers/net/mlx5/linux/mlx5_ethdev_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -1356,13 +1356,16 @@ _mlx5_os_read_dev_counters(struct rte_eth_dev *dev, int pf, uint64_t *stats)
struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
unsigned int i;
struct ifreq ifr;
unsigned int stats_sz = xstats_ctrl->stats_n * sizeof(uint64_t);
unsigned int max_stats_n = RTE_MAX(xstats_ctrl->stats_n, xstats_ctrl->stats_n_2nd);
unsigned int stats_sz = max_stats_n * sizeof(uint64_t);
unsigned char et_stat_buf[sizeof(struct ethtool_stats) + stats_sz];
struct ethtool_stats *et_stats = (struct ethtool_stats *)et_stat_buf;
int ret;
uint16_t i_idx, o_idx;

et_stats->cmd = ETHTOOL_GSTATS;
et_stats->n_stats = xstats_ctrl->stats_n;
/* Pass the maximum value, the driver may ignore this. */
et_stats->n_stats = max_stats_n;
ifr.ifr_data = (caddr_t)et_stats;
if (pf >= 0)
ret = mlx5_ifreq_by_ifname(priv->sh->bond.ports[pf].ifname,
Expand All @@ -1375,37 +1378,50 @@ _mlx5_os_read_dev_counters(struct rte_eth_dev *dev, int pf, uint64_t *stats)
dev->data->port_id);
return ret;
}
for (i = 0; i != xstats_ctrl->mlx5_stats_n; ++i) {
if (xstats_ctrl->info[i].dev)
continue;
stats[i] += (uint64_t)
et_stats->data[xstats_ctrl->dev_table_idx[i]];
if (pf <= 0) {
for (i = 0; i != xstats_ctrl->mlx5_stats_n; i++) {
i_idx = xstats_ctrl->dev_table_idx[i];
if (i_idx == UINT16_MAX || xstats_ctrl->info[i].dev)
continue;
o_idx = xstats_ctrl->xstats_o_idx[i];
stats[o_idx] += (uint64_t)et_stats->data[i_idx];
}
} else {
for (i = 0; i != xstats_ctrl->mlx5_stats_n; i++) {
i_idx = xstats_ctrl->dev_table_idx_2nd[i];
if (i_idx == UINT16_MAX)
continue;
o_idx = xstats_ctrl->xstats_o_idx_2nd[i];
stats[o_idx] += (uint64_t)et_stats->data[i_idx];
}
}
return 0;
}

/**
/*
* Read device counters.
*
* @param dev
* Pointer to Ethernet device.
* @param[out] stats
* @param bond_master
* Indicate if the device is a bond master.
* @param stats
* Counters table output buffer.
*
* @return
* 0 on success and stats is filled, negative errno value otherwise and
* rte_errno is set.
*/
int
mlx5_os_read_dev_counters(struct rte_eth_dev *dev, uint64_t *stats)
mlx5_os_read_dev_counters(struct rte_eth_dev *dev, bool bond_master, uint64_t *stats)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
int ret = 0, i;

memset(stats, 0, sizeof(*stats) * xstats_ctrl->mlx5_stats_n);
/* Read ifreq counters. */
if (priv->master && priv->pf_bond >= 0) {
if (bond_master) {
/* Sum xstats from bonding device member ports. */
for (i = 0; i < priv->sh->bond.n_port; i++) {
ret = _mlx5_os_read_dev_counters(dev, i, stats);
Expand All @@ -1417,32 +1433,42 @@ mlx5_os_read_dev_counters(struct rte_eth_dev *dev, uint64_t *stats)
if (ret)
return ret;
}
/* Read IB counters. */
for (i = 0; i != xstats_ctrl->mlx5_stats_n; ++i) {
/*
* Read IB counters.
* The counters are unique per IB device but not per net IF.
* In bonding mode, getting the stats name only from 1 port is enough.
*/
for (i = 0; i != xstats_ctrl->mlx5_stats_n; i++) {
if (!xstats_ctrl->info[i].dev)
continue;
/* return last xstats counter if fail to read. */
if (mlx5_os_read_dev_stat(priv, xstats_ctrl->info[i].ctr_name,
&stats[i]) == 0)
&stats[i]) == 0)
xstats_ctrl->xstats[i] = stats[i];
else
stats[i] = xstats_ctrl->xstats[i];
}
return ret;
}

/**
/*
* Query the number of statistics provided by ETHTOOL.
*
* @param dev
* Pointer to Ethernet device.
* @param bond_master
* Indicate if the device is a bond master.
* @param n_stats
* Pointer to number of stats to store.
* @param n_stats_sec
* Pointer to number of stats to store for the 2nd port of the bond.
*
* @return
* Number of statistics on success, negative errno value otherwise and
* rte_errno is set.
* 0 on success, negative errno value otherwise and rte_errno is set.
*/
int
mlx5_os_get_stats_n(struct rte_eth_dev *dev)
mlx5_os_get_stats_n(struct rte_eth_dev *dev, bool bond_master,
uint16_t *n_stats, uint16_t *n_stats_sec)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct ethtool_drvinfo drvinfo;
Expand All @@ -1451,18 +1477,34 @@ mlx5_os_get_stats_n(struct rte_eth_dev *dev)

drvinfo.cmd = ETHTOOL_GDRVINFO;
ifr.ifr_data = (caddr_t)&drvinfo;
if (priv->master && priv->pf_bond >= 0)
/* Bonding PF. */
/* Bonding PFs. */
if (bond_master) {
ret = mlx5_ifreq_by_ifname(priv->sh->bond.ports[0].ifname,
SIOCETHTOOL, &ifr);
else
if (ret) {
DRV_LOG(WARNING, "bonding port %u unable to query number of"
" statistics for the 1st slave, %d", PORT_ID(priv), ret);
return ret;
}
*n_stats = drvinfo.n_stats;
ret = mlx5_ifreq_by_ifname(priv->sh->bond.ports[1].ifname,
SIOCETHTOOL, &ifr);
if (ret) {
DRV_LOG(WARNING, "bonding port %u unable to query number of"
" statistics for the 2nd slave, %d", PORT_ID(priv), ret);
return ret;
}
*n_stats_sec = drvinfo.n_stats;
} else {
ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
if (ret) {
DRV_LOG(WARNING, "port %u unable to query number of statistics",
dev->data->port_id);
return ret;
if (ret) {
DRV_LOG(WARNING, "port %u unable to query number of statistics",
PORT_ID(priv));
return ret;
}
*n_stats = drvinfo.n_stats;
}
return drvinfo.n_stats;
return 0;
}

static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
Expand Down Expand Up @@ -1584,6 +1626,101 @@ static const struct mlx5_counter_ctrl mlx5_counters_init[] = {

static const unsigned int xstats_n = RTE_DIM(mlx5_counters_init);

static int
mlx5_os_get_stats_strings(struct rte_eth_dev *dev, bool bond_master,
struct ethtool_gstrings *strings,
uint32_t stats_n, uint32_t stats_n_2nd)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
struct ifreq ifr;
int ret;
uint32_t i, j, idx;

/* Ensure no out of bounds access before. */
MLX5_ASSERT(xstats_n <= MLX5_MAX_XSTATS);
strings->cmd = ETHTOOL_GSTRINGS;
strings->string_set = ETH_SS_STATS;
strings->len = stats_n;
ifr.ifr_data = (caddr_t)strings;
if (bond_master)
ret = mlx5_ifreq_by_ifname(priv->sh->bond.ports[0].ifname,
SIOCETHTOOL, &ifr);
else
ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
if (ret) {
DRV_LOG(WARNING, "port %u unable to get statistic names with %d",
PORT_ID(priv), ret);
return ret;
}
/* Reorganize the orders to reduce the iterations. */
for (j = 0; j < xstats_n; j++) {
xstats_ctrl->dev_table_idx[j] = UINT16_MAX;
for (i = 0; i < stats_n; i++) {
const char *curr_string =
(const char *)&strings->data[i * ETH_GSTRING_LEN];

if (!strcmp(mlx5_counters_init[j].ctr_name, curr_string)) {
idx = xstats_ctrl->mlx5_stats_n++;
xstats_ctrl->dev_table_idx[j] = i;
xstats_ctrl->xstats_o_idx[j] = idx;
xstats_ctrl->info[idx] = mlx5_counters_init[j];
}
}
}
if (!bond_master) {
/* Add dev counters, unique per IB device. */
for (j = 0; j != xstats_n; j++) {
if (mlx5_counters_init[j].dev) {
idx = xstats_ctrl->mlx5_stats_n++;
xstats_ctrl->info[idx] = mlx5_counters_init[j];
xstats_ctrl->hw_stats[idx] = 0;
}
}
return 0;
}

strings->len = stats_n_2nd;
ret = mlx5_ifreq_by_ifname(priv->sh->bond.ports[1].ifname,
SIOCETHTOOL, &ifr);
if (ret) {
DRV_LOG(WARNING, "port %u unable to get statistic names for 2nd slave with %d",
PORT_ID(priv), ret);
return ret;
}
/* The 2nd slave port may have a different strings set, based on the configuration. */
for (j = 0; j != xstats_n; j++) {
xstats_ctrl->dev_table_idx_2nd[j] = UINT16_MAX;
for (i = 0; i != stats_n_2nd; i++) {
const char *curr_string =
(const char *)&strings->data[i * ETH_GSTRING_LEN];

if (!strcmp(mlx5_counters_init[j].ctr_name, curr_string)) {
xstats_ctrl->dev_table_idx_2nd[j] = i;
if (xstats_ctrl->dev_table_idx[j] != UINT16_MAX) {
/* Already mapped in the 1st slave port. */
idx = xstats_ctrl->xstats_o_idx[j];
xstats_ctrl->xstats_o_idx_2nd[j] = idx;
} else {
/* Append the new items to the end of the map. */
idx = xstats_ctrl->mlx5_stats_n++;
xstats_ctrl->xstats_o_idx_2nd[j] = idx;
xstats_ctrl->info[idx] = mlx5_counters_init[j];
}
}
}
}
/* Dev counters are always at the last now. */
for (j = 0; j != xstats_n; j++) {
if (mlx5_counters_init[j].dev) {
idx = xstats_ctrl->mlx5_stats_n++;
xstats_ctrl->info[idx] = mlx5_counters_init[j];
xstats_ctrl->hw_stats[idx] = 0;
}
}
return 0;
}

/**
* Init the structures to read device counters.
*
Expand All @@ -1596,76 +1733,44 @@ mlx5_os_stats_init(struct rte_eth_dev *dev)
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
struct mlx5_stats_ctrl *stats_ctrl = &priv->stats_ctrl;
unsigned int i;
unsigned int j;
struct ifreq ifr;
struct ethtool_gstrings *strings = NULL;
unsigned int dev_stats_n;
uint16_t dev_stats_n = 0;
uint16_t dev_stats_n_2nd = 0;
unsigned int max_stats_n;
unsigned int str_sz;
int ret;
bool bond_master = (priv->master && priv->pf_bond >= 0);

/* So that it won't aggregate for each init. */
xstats_ctrl->mlx5_stats_n = 0;
ret = mlx5_os_get_stats_n(dev);
ret = mlx5_os_get_stats_n(dev, bond_master, &dev_stats_n, &dev_stats_n_2nd);
if (ret < 0) {
DRV_LOG(WARNING, "port %u no extended statistics available",
dev->data->port_id);
return;
}
dev_stats_n = ret;
max_stats_n = RTE_MAX(dev_stats_n, dev_stats_n_2nd);
/* Allocate memory to grab stat names and values. */
str_sz = dev_stats_n * ETH_GSTRING_LEN;
str_sz = max_stats_n * ETH_GSTRING_LEN;
strings = (struct ethtool_gstrings *)
mlx5_malloc(0, str_sz + sizeof(struct ethtool_gstrings), 0,
SOCKET_ID_ANY);
if (!strings) {
DRV_LOG(WARNING, "port %u unable to allocate memory for xstats",
dev->data->port_id);
dev->data->port_id);
return;
}
strings->cmd = ETHTOOL_GSTRINGS;
strings->string_set = ETH_SS_STATS;
strings->len = dev_stats_n;
ifr.ifr_data = (caddr_t)strings;
if (priv->master && priv->pf_bond >= 0)
/* Bonding master. */
ret = mlx5_ifreq_by_ifname(priv->sh->bond.ports[0].ifname,
SIOCETHTOOL, &ifr);
else
ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
if (ret) {
DRV_LOG(WARNING, "port %u unable to get statistic names",
ret = mlx5_os_get_stats_strings(dev, bond_master, strings,
dev_stats_n, dev_stats_n_2nd);
if (ret < 0) {
DRV_LOG(WARNING, "port %u failed to get the stats strings",
dev->data->port_id);
goto free;
}
for (i = 0; i != dev_stats_n; ++i) {
const char *curr_string = (const char *)
&strings->data[i * ETH_GSTRING_LEN];

for (j = 0; j != xstats_n; ++j) {
if (!strcmp(mlx5_counters_init[j].ctr_name,
curr_string)) {
unsigned int idx = xstats_ctrl->mlx5_stats_n++;

xstats_ctrl->dev_table_idx[idx] = i;
xstats_ctrl->info[idx] = mlx5_counters_init[j];
break;
}
}
}
/* Add dev counters. */
for (i = 0; i != xstats_n; ++i) {
if (mlx5_counters_init[i].dev) {
unsigned int idx = xstats_ctrl->mlx5_stats_n++;

xstats_ctrl->info[idx] = mlx5_counters_init[i];
xstats_ctrl->hw_stats[idx] = 0;
}
}
MLX5_ASSERT(xstats_ctrl->mlx5_stats_n <= MLX5_MAX_XSTATS);
xstats_ctrl->stats_n = dev_stats_n;
xstats_ctrl->stats_n_2nd = dev_stats_n_2nd;
/* Copy to base at first time. */
ret = mlx5_os_read_dev_counters(dev, xstats_ctrl->base);
ret = mlx5_os_read_dev_counters(dev, bond_master, xstats_ctrl->base);
if (ret)
DRV_LOG(ERR, "port %u cannot read device counters: %s",
dev->data->port_id, strerror(rte_errno));
Expand Down

0 comments on commit 2481136

Please sign in to comment.