From 3f936bae970e8ce116151b7106bf7d6437046c19 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 12 Sep 2017 14:56:19 +1000 Subject: [PATCH] phb4: Retrain link if degraded On P9 Scale Out (Nimbus) DD2.0 and Scale in (Cumulus) DD1.0 (and below) the PCIe PHY can lockup causing training issues. This can cause a degradation in speed or width in ~5% of training cases (depending on the card). This is fixed in later chip revisions. This issue can also cause PCIe links to not train at all, but this case is already handled. This patch checks if the PCIe link has trained optimally and if not, does a full PHB reset (to fix the PHY lockup) and retrain. One complication is some devices are known to train degraded unless device specific configuration is performed. Because of this, we only retrain when the device is in a whitelist. All devices in the current whitelist have been testing on a P9DSU/Boston, ZZ and Witherspoon. We always gather information on the link and print it in the logs even if the card is not in the whitelist. For testing purposes, there's an nvram to retry all PCIe cards and all P9 chips when a degraded link is detected. The new option is 'pci-retry-all=true' which can be set using: nvram -p ibm,skiboot --update-config pci-retry-all=true This option may increase the boot time if used on a badly behaving card. Signed-off-by: Michael Neuling [stewart@linux.vnet.ibm.com: fix Cumulus VERS_MAJ r.e. Mikey mail] Signed-off-by: Stewart Smith --- hw/phb4.c | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 133 insertions(+), 1 deletion(-) diff --git a/hw/phb4.c b/hw/phb4.c index 8c24238f8e9b..c469797da422 100644 --- a/hw/phb4.c +++ b/hw/phb4.c @@ -141,6 +141,7 @@ static void phb4_init_hw(struct phb4 *p, bool first_init); static bool verbose_eeh; static bool pci_tracing; static bool pci_eeh_mmio; +static bool pci_retry_all; enum capi_dma_tvt { CAPI_DMA_TVT0, @@ -2261,7 +2262,7 @@ static int64_t phb4_retry_state(struct pci_slot *slot) PHBERR(p, "Electrical link detected but won't train\n"); break; case PHB4_SLOT_LINK_STABLE: - PHBERR(p, "Linked trained but wasn't stable\n"); + PHBERR(p, "Linked trained but was degraded or unstable\n"); break; default: PHBERR(p, "Unknown link issue\n"); @@ -2380,6 +2381,131 @@ static bool phb4_check_reg(struct phb4 *p, uint64_t reg) return true; } +static void phb4_get_info(struct phb *phb, uint16_t bdfn, uint8_t *speed, + uint8_t *width) +{ + int32_t ecap; + uint32_t cap; + + ecap = pci_find_cap(phb, bdfn, PCI_CFG_CAP_ID_EXP); + pci_cfg_read32(phb, bdfn, ecap + PCICAP_EXP_LCAP, &cap); + *width = (cap & PCICAP_EXP_LCAP_MAXWDTH) >> 4; + *speed = cap & PCICAP_EXP_LCAP_MAXSPD; +} + +#define PVR_POWER9_CUMULUS 0x00002000 + +static bool phb4_chip_retry_workaround(void) +{ + unsigned int pvr; + + if (pci_retry_all) + return true; + + /* Chips that need this retry are: + * - CUMULUS DD1.0 + * - NIMBUS DD2.0 and below + */ + pvr = mfspr(SPR_PVR); + if (pvr & PVR_POWER9_CUMULUS) { + if ((PVR_VERS_MAJ(pvr) == 1) && (PVR_VERS_MIN(pvr) == 0)) + return true; + } else { /* NIMBUS */ + if (PVR_VERS_MAJ(pvr) == 1) + return true; + if ((PVR_VERS_MAJ(pvr) == 2) && (PVR_VERS_MIN(pvr) == 0)) + return true; + } + return false; +} + +struct pci_card_id { + uint16_t vendor; + uint16_t device; +}; + +struct pci_card_id retry_whitelist[] = { + { 0x1000, 0x005d }, /* LSI Logic MegaRAID SAS-3 3108 */ + { 0x1000, 0x00c9 }, /* LSI MPT SAS-3 */ + { 0x104c, 0x8241 }, /* TI xHCI USB */ + { 0x1077, 0x2261 }, /* QLogic ISP2722-based 16/32Gb FC */ + { 0x10b5, 0x8725 }, /* PLX Switch: p9dsu, witherspoon */ + { 0x10b5, 0x8748 }, /* PLX Switch: ZZ */ + { 0x11f8, 0xf117 }, /* PMC-Sierra/MicroSemi NV1604 */ + { 0x15b3, 0x1013 }, /* Mellanox CX-4 */ + { 0x15b3, 0x1019 }, /* Mellanox CX-5 */ + { 0x1a03, 0x1150 }, /* ASPEED AST2500 Switch */ + { 0x8086, 0x10fb }, /* Intel x520 10G Eth */ + { 0x9005, 0x028d }, /* MicroSemi PM8069 */ +}; + +#define VENDOR(vdid) ((vdid) & 0xffff) +#define DEVICE(vdid) (((vdid) >> 16) & 0xffff) + +static bool phb4_adapter_in_whitelist(uint32_t vdid) +{ + int i; + + if (pci_retry_all) + return true; + + for (i = 0; i < ARRAY_SIZE(retry_whitelist); i++) + if ((retry_whitelist[i].vendor == VENDOR(vdid)) && + (retry_whitelist[i].device == DEVICE(vdid))) + return true; + + return false; +} + +#define min(x,y) ((x) < (y) ? x : y) + +static bool phb4_link_optimal(struct pci_slot *slot) +{ + struct phb4 *p = phb_to_phb4(slot->phb); + uint32_t vdid; + uint16_t bdfn; + uint8_t trained_speed, phb_speed, dev_speed, target_speed; + uint8_t trained_width, phb_width, dev_width, target_width; + bool optimal_speed, optimal_width, optimal, retry_enabled; + + + /* Current trained state */ + phb4_get_link_info(slot, &trained_speed, &trained_width); + + /* Get PHB capability */ + /* NOTE: phb_speed will account for the software speed limit */ + phb4_get_info(slot->phb, 0, &phb_speed, &phb_width); + + /* Get device capability */ + bdfn = 0x0100; /* bus=1 dev=0 device=0 */ + /* Since this is the first access, we need to wait for CRS */ + if (!pci_wait_crs(slot->phb, bdfn , &vdid)) + return true; + phb4_get_info(slot->phb, bdfn, &dev_speed, &dev_width); + + /* Work out if we are optimally trained */ + target_speed = min(phb_speed, dev_speed); + optimal_speed = (trained_speed >= target_speed); + target_width = min(phb_width, dev_width); + optimal_width = (trained_width >= target_width); + optimal = optimal_width && optimal_speed; + retry_enabled = phb4_chip_retry_workaround() && + phb4_adapter_in_whitelist(vdid); + + PHBDBG(p, "LINK: Card [%04x:%04x] %s Retry:%s\n", VENDOR(vdid), + DEVICE(vdid), optimal ? "Optimal" : "Degraded", + retry_enabled ? "enabled" : "disabled"); + PHBDBG(p, "LINK: Speed Train:GEN%i PHB:GEN%i DEV:GEN%i%s\n", + trained_speed, phb_speed, dev_speed, optimal_speed ? "" : " *"); + PHBDBG(p, "LINK: Width Train:x%02i PHB:x%02i DEV:x%02i%s\n", + trained_width, phb_width, dev_width, optimal_width ? "" : " *"); + + if (!retry_enabled) + return true; + + return optimal; +} + /* * This is a trace function to watch what's happening duing pcie link * training. If any errors are detected it simply returns so the @@ -2494,6 +2620,10 @@ static int64_t phb4_poll_link(struct pci_slot *slot) } if (reg & PHB_PCIE_DLP_TL_LINKACT) { PHBDBG(p, "LINK: Link is stable\n"); + if (!phb4_link_optimal(slot)) { + PHBERR(p, "LINK: Link degraded\n"); + return phb4_retry_state(slot); + } pci_slot_set_state(slot, PHB4_SLOT_NORMAL); return OPAL_SUCCESS; } @@ -4963,6 +5093,8 @@ void probe_phb4(void) pci_tracing = nvram_query_eq("pci-tracing", "true"); pci_eeh_mmio = !nvram_query_eq("pci-eeh-mmio", "disabled"); + pci_retry_all = nvram_query_eq("pci-retry-all", "true"); + /* Look for PBCQ XSCOM nodes */ dt_for_each_compatible(dt_root, np, "ibm,power9-pbcq") phb4_probe_pbcq(np);