Skip to content

Commit

Permalink
npu2-opencapi: Detect if link trained in degraded mode
Browse files Browse the repository at this point in the history
There's no status readily available to tell the effective link
width. Instead, we have to look at the individual status of each lane,
on the transmit and receive direction. All relevant information is in
the ODL status register.

Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
  • Loading branch information
fbarrat authored and stewartsmith committed Nov 28, 2018
1 parent ff37680 commit 64d06b1
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 19 deletions.
50 changes: 31 additions & 19 deletions hw/npu2-opencapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1013,32 +1013,43 @@ static int64_t npu2_opencapi_get_presence_state(struct pci_slot __unused *slot,
return OPAL_SUCCESS;
}

static enum OpalShpcLinkState get_link_width(uint64_t odl_status)
{
uint64_t tx_lanes, rx_lanes, state;

/*
* On P9, the 'trained mode' field of the ODL status is
* hard-coded to x8 and is useless for us. We need to look at
* the status of the individual lanes.
* The link trains at x8, x4 or not at all.
*/
state = GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, odl_status);
if (state != OCAPI_LINK_STATE_TRAINED)
return OPAL_SHPC_LINK_DOWN;

rx_lanes = GETFIELD(OB_ODL_STATUS_RX_TRAINED_LANES, odl_status);
tx_lanes = GETFIELD(OB_ODL_STATUS_TX_TRAINED_LANES, odl_status);
if ((rx_lanes != 0xFF) || (tx_lanes != 0xFF))
return OPAL_SHPC_LINK_UP_x4;
else
return OPAL_SHPC_LINK_UP_x8;
}

static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, uint8_t *val)
{
struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
uint64_t reg;
int64_t link_width, training_status, rc = OPAL_SUCCESS;

reg = get_odl_status(dev->npu->chip_id, dev->brick_index);
link_width = GETFIELD(OB_ODL_STATUS_TRAINED_MODE, reg);
training_status = GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg);

if (training_status != OCAPI_LINK_STATE_TRAINED) {
*val = OPAL_SHPC_LINK_DOWN;
return OPAL_SUCCESS;
}
*val = get_link_width(reg);
return OPAL_SUCCESS;
}

switch (link_width) {
case 0b0001:
*val = OPAL_SHPC_LINK_UP_x4;
break;
case 0b0010:
*val = OPAL_SHPC_LINK_UP_x8;
break;
default:
rc = OPAL_HARDWARE;
}
return rc;
static void check_trained_link(struct npu2_dev *dev, uint64_t odl_status)
{
if (get_link_width(odl_status) != OPAL_SHPC_LINK_UP_x8)
OCAPIERR(dev, "Link trained in degraded mode (%016llx)\n",
odl_status);
}

static int64_t npu2_opencapi_retry_state(struct pci_slot *slot,
Expand Down Expand Up @@ -1089,6 +1100,7 @@ static int64_t npu2_opencapi_poll_link(struct pci_slot *slot)
OCAPI_LINK_STATE_TRAINED) {
OCAPIINF(dev, "link trained in %lld ms\n",
OCAPI_LINK_TRAINING_TIMEOUT - slot->retries);
check_trained_link(dev, reg);
pci_slot_set_state(slot, OCAPI_SLOT_LINK_TRAINED);
return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
}
Expand Down
2 changes: 2 additions & 0 deletions include/npu2-regs.h
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,8 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define OB3_ODL0_STATUS 0xC01082C
#define OB3_ODL1_STATUS 0xC01082D
#define OB_ODL_STATUS_TRAINED_MODE PPC_BITMASK(0,3)
#define OB_ODL_STATUS_RX_TRAINED_LANES PPC_BITMASK(16, 23)
#define OB_ODL_STATUS_TX_TRAINED_LANES PPC_BITMASK(24, 31)
#define OB_ODL_STATUS_TRAINING_STATE_MACHINE PPC_BITMASK(49, 51)

#define OB0_ODL0_TRAINING_STATUS 0x901082E
Expand Down

0 comments on commit 64d06b1

Please sign in to comment.