Skip to content

Commit

Permalink
net/bnxt: support periodic FW health monitoring
Browse files Browse the repository at this point in the history
Periodically poll the FW heartbeat register and FW recovery counter
registers to check the FW health. Polling frequency will be
advertised by the FW in HWRM_ERROR_RECOVERY_QCFG response.
Schedule the task upon receiving the async event from FW.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
  • Loading branch information
Kalesh AP authored and Ferruh Yigit committed Oct 8, 2019
1 parent 6391aeb commit 9d0cbae
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 0 deletions.
6 changes: 6 additions & 0 deletions drivers/net/bnxt/bnxt.h
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,9 @@ struct bnxt_error_recovery_info {
#define BNXT_FLAG_MASTER_FUNC (1 << 2)
#define BNXT_FLAG_RECOVERY_ENABLED (1 << 3)
uint32_t flags;

uint32_t last_heart_beat;
uint32_t last_reset_counter;
};

/* address space location of register */
Expand Down Expand Up @@ -416,6 +419,7 @@ struct bnxt {
#define BNXT_FLAG_FW_CAP_IF_CHANGE (1 << 17)
#define BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE (1 << 18)
#define BNXT_FLAG_FW_CAP_ERROR_RECOVERY (1 << 19)
#define BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED (1 << 20)
#define BNXT_FLAG_EXT_STATS_SUPPORTED (1 << 29)
#define BNXT_FLAG_NEW_RM (1 << 30)
#define BNXT_FLAG_INIT_DONE (1U << 31)
Expand Down Expand Up @@ -532,6 +536,8 @@ int bnxt_rcv_msg_from_vf(struct bnxt *bp, uint16_t vf_id, void *msg);
int is_bnxt_in_error(struct bnxt *bp);

int bnxt_map_fw_health_status_regs(struct bnxt *bp);
uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index);
void bnxt_schedule_fw_health_check(struct bnxt *bp);

bool is_bnxt_supported(struct rte_eth_dev *dev);
bool bnxt_stratus_device(struct bnxt *bp);
Expand Down
10 changes: 10 additions & 0 deletions drivers/net/bnxt/bnxt_cpr.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,16 @@ void bnxt_handle_async_event(struct bnxt *bp,
PMD_DRV_LOG(INFO, "recovery enabled(%d), master function(%d)\n",
bnxt_is_recovery_enabled(bp),
bnxt_is_master_func(bp));

if (bp->flags & BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED)
return;

info->last_heart_beat =
bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG);
info->last_reset_counter =
bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG);

bnxt_schedule_fw_health_check(bp);
break;
default:
PMD_DRV_LOG(INFO, "handle_async_event id = 0x%x\n", event_id);
Expand Down
97 changes: 97 additions & 0 deletions drivers/net/bnxt/bnxt_ethdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu);
static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev);
static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev);
static int bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev);
static void bnxt_cancel_fw_health_check(struct bnxt *bp);

int is_bnxt_in_error(struct bnxt *bp)
{
Expand Down Expand Up @@ -858,6 +859,7 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
bp->flags |= BNXT_FLAG_INIT_DONE;
eth_dev->data->dev_started = 1;
bp->dev_stopped = 0;
bnxt_schedule_fw_health_check(bp);
return 0;

error:
Expand Down Expand Up @@ -910,6 +912,8 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
/* disable uio/vfio intr/eventfd mapping */
rte_intr_disable(intr_handle);

bnxt_cancel_fw_health_check(bp);

bp->flags &= ~BNXT_FLAG_INIT_DONE;
if (bp->eth_dev->data->dev_started) {
/* TBD: STOP HW queues DMA */
Expand Down Expand Up @@ -3682,6 +3686,99 @@ void bnxt_dev_reset_and_resume(void *arg)
PMD_DRV_LOG(ERR, "Error setting recovery alarm");
}

uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index)
{
struct bnxt_error_recovery_info *info = bp->recovery_info;
uint32_t reg = info->status_regs[index];
uint32_t type, offset, val = 0;

type = BNXT_FW_STATUS_REG_TYPE(reg);
offset = BNXT_FW_STATUS_REG_OFF(reg);

switch (type) {
case BNXT_FW_STATUS_REG_TYPE_CFG:
rte_pci_read_config(bp->pdev, &val, sizeof(val), offset);
break;
case BNXT_FW_STATUS_REG_TYPE_GRC:
offset = info->mapped_status_regs[index];
/* FALLTHROUGH */
case BNXT_FW_STATUS_REG_TYPE_BAR0:
val = rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 +
offset));
break;
}

return val;
}

/* Driver should poll FW heartbeat, reset_counter with the frequency
* advertised by FW in HWRM_ERROR_RECOVERY_QCFG.
* When the driver detects heartbeat stop or change in reset_counter,
* it has to trigger a reset to recover from the error condition.
* A “master PF” is the function who will have the privilege to
* initiate the chimp reset. The master PF will be elected by the
* firmware and will be notified through async message.
*/
static void bnxt_check_fw_health(void *arg)
{
struct bnxt *bp = arg;
struct bnxt_error_recovery_info *info = bp->recovery_info;
uint32_t val = 0;

if (!info || !bnxt_is_recovery_enabled(bp) ||
is_bnxt_in_error(bp))
return;

val = bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG);
if (val == info->last_heart_beat)
goto reset;

info->last_heart_beat = val;

val = bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG);
if (val != info->last_reset_counter)
goto reset;

info->last_reset_counter = val;

rte_eal_alarm_set(US_PER_MS * info->driver_polling_freq,
bnxt_check_fw_health, (void *)bp);

return;
reset:
/* Stop DMA to/from device */
bp->flags |= BNXT_FLAG_FATAL_ERROR;
bp->flags |= BNXT_FLAG_FW_RESET;

PMD_DRV_LOG(ERR, "Detected FW dead condition\n");
}

void bnxt_schedule_fw_health_check(struct bnxt *bp)
{
uint32_t polling_freq;

if (!bnxt_is_recovery_enabled(bp))
return;

if (bp->flags & BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED)
return;

polling_freq = bp->recovery_info->driver_polling_freq;

rte_eal_alarm_set(US_PER_MS * polling_freq,
bnxt_check_fw_health, (void *)bp);
bp->flags |= BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED;
}

static void bnxt_cancel_fw_health_check(struct bnxt *bp)
{
if (!bnxt_is_recovery_enabled(bp))
return;

rte_eal_alarm_cancel(bnxt_check_fw_health, (void *)bp);
bp->flags &= ~BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED;
}

static bool bnxt_vf_pciid(uint16_t id)
{
if (id == BROADCOM_DEV_ID_57304_VF ||
Expand Down

0 comments on commit 9d0cbae

Please sign in to comment.