diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c index 837c9e653d9fa8..c9cc08c5657a3e 100644 --- a/drivers/pci/controller/pcie-brcmstb.c +++ b/drivers/pci/controller/pcie-brcmstb.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -62,6 +63,34 @@ #define PCIE_RC_PL_PHY_CTL_15_DIS_PLL_PD_MASK 0x400000 #define PCIE_RC_PL_PHY_CTL_15_PM_CLK_PERIOD_MASK 0xff +#define PCIE_RC_PL_STATS_CTRL 0x1940 +#define PCIE_RC_PL_STATS_CTRL_EN_MASK 0x1 +#define PCIE_RC_PL_STATS_CTRL_LEN_MASK 0xfffffff0 + +#define PCIE_RC_PL_STATS_TXTLP_LO 0x1944 +#define PCIE_RC_PL_STATS_TXTLP_HI 0x1948 +#define PCIE_RC_PL_STATS_TXDLLP_LO 0x194c +#define PCIE_RC_PL_STATS_TXDLLP_HI 0x1950 +#define PCIE_RC_PL_STATS_RXTLP_LO 0x195c +#define PCIE_RC_PL_STATS_RXTLP_HI 0x1960 +#define PCIE_RC_PL_STATS_RXDLLP_LO 0x1964 +#define PCIE_RC_PL_STATS_RXDLLP_HI 0x1968 +#define PCIE_RC_PL_STATS_RXPL_ERR 0x1974 +#define PCIE_RC_PL_STATS_RXDL_ERR 0x1978 +#define PCIE_RC_PL_STATS_RXTL_ERR 0x197c + +#define PCIE_RC_PL_LTSSM_STATS_3 0x19b0 +#define PCIE_RC_PL_LTSSM_STATS_3_TIME_L0S_MASK 0xffff0000 +#define PCIE_RC_PL_LTSSM_STATS_3_TIME_RECOV_MASK 0x0000ffff + +#define PCIE_RC_PL_LTSSM_STATS_CNT 0x19b4 +#define PCIE_RC_PL_LTSSM_STATS_CNT_L0S_FAIL_MASK 0xffff0000 +#define PCIE_RC_PL_LTSSM_STATS_CNT_RECOV_MASK 0x0000ffff + +#define PCIE_RC_PL_LTSSM_HIST_0 0x1cec +#define PCIE_RC_PL_LTSSM_HIST(n) \ + (PCIE_RC_PL_LTSSM_HIST_0 + ((n) * 4)) + #define PCIE_MISC_MISC_CTRL 0x4008 #define PCIE_MISC_MISC_CTRL_PCIE_RCB_64B_MODE_MASK 0x80 #define PCIE_MISC_MISC_CTRL_PCIE_RCB_MPS_MODE_MASK 0x400 @@ -292,6 +321,91 @@ struct inbound_win { u64 cpu_addr; }; +#define TRACE_BUF_LEN 64 +struct trace_entry { + u8 st; + ktime_t ts; +}; + +static const char *brcm_pcie_decode_ltssm_state(u8 state) +{ + switch (state) { + case 0x01: + return "L0"; + case 0x02: + return "L1"; + case 0x03: + return "L2"; + case 0x04: + return "RxL0s_TxL0s"; + case 0x05: + return "RxL0_TxL0s"; + case 0x09: + return "Polling.Active"; + case 0x0A: + return "Polling.Configuration"; + case 0x0C: + return "Polling.Compliance"; + case 0x10: + return "Configuration.Link"; + case 0x11: + return "Configuration.Linkwidth.Accept"; + case 0x12: + return "Configuration.Lanenum.Wait"; + case 0x13: + return "Confguration.Lanenum.Accept"; + case 0x14: + return "Confguration.Complete"; + case 0x15: + return "Configuration.Idle"; + case 0x20: + return "Recovery.RcvrLock"; + case 0x21: + return "Recovery.RcvrCfg"; + case 0x22: + return "Recovery.Idle"; + case 0x23: + return "Recovery.Speed"; + case 0x24: + return "Recovery.EQ_Phase0"; + case 0x25: + return "Recovery.EQ_Phase1"; + case 0x26: + return "Recovery.EQ_Phase2"; + case 0x27: + return "Recovery.EQ_Phase3"; + case 0x40: + return "Disable"; + case 0x43: + return "Reset"; + case 0x47: + return "Loopback.Entry"; + case 0x48: + return "Loopback.Active"; + case 0x49: + return "Loopback.Exit"; + case 0x4C: + return "Loopback.Master.Entry"; + case 0x4D: + return "Loopback.Master.Active"; + case 0x4E: + return "Loopback.Master.Exit"; + case 0x70: + return "Recovery.Speed_Change"; + case 0x80: + return "Detect.Quiet"; + case 0x81: + return "Detect.Active"; + case 0x82: + return "Detect.Wait"; + case 0x83: + return "Detect.Active2"; + default: + break; + } + return NULL; +}; + /* * The RESCAL block is tied to PCIe controller #1, regardless of the number of * controllers, and turning off PCIe controller #1 prevents access to the RESCAL @@ -362,6 +476,21 @@ struct brcm_pcie { bool ep_wakeup_capable; const struct pcie_cfg_data *cfg; u32 tperst_clk_ms; + bool trace_ltssm; + struct trace_entry *ltssm_trace_buf; + /* Statistics exposed in debugfs */ + struct dentry *debugfs_dir; + u64 tx_tlp; + u64 rx_tlp; + u64 tx_dllp; + u64 rx_dllp; + u32 pl_rx_err; + u32 dl_rx_err; + u32 tl_rx_err; + u16 l0s_exit_time; + u16 recov_time; + u16 l0s_fail_cnt; + u16 recov_cnt; }; static inline bool is_bmips(const struct brcm_pcie *pcie) @@ -1482,6 +1611,133 @@ static void brcm_config_clkreq(struct brcm_pcie *pcie) dev_info(pcie->dev, "clkreq-mode set to %s\n", mode); } +static void brcm_pcie_stats_trigger(struct brcm_pcie *pcie, u32 micros) +{ + u32 tmp; + + /* + * A 0->1 transition on CTRL_EN is required to clear counters and start capture. + * A microseconds count of 0 starts continuous gathering. + */ + tmp = readl(pcie->base + PCIE_RC_PL_STATS_CTRL); + u32p_replace_bits(&tmp, 0, PCIE_RC_PL_STATS_CTRL_EN_MASK); + writel(tmp, pcie->base + PCIE_RC_PL_STATS_CTRL); + + if (micros >= (1 << 28)) + micros = (1 << 28) - 1U; + u32p_replace_bits(&tmp, micros, PCIE_RC_PL_STATS_CTRL_LEN_MASK); + u32p_replace_bits(&tmp, 1, PCIE_RC_PL_STATS_CTRL_EN_MASK); + + writel(tmp, pcie->base + PCIE_RC_PL_STATS_CTRL); +} + +static void brcm_pcie_stats_capture(struct brcm_pcie *pcie) +{ + u32 tmp; + + /* Snapshot the counters - capture engine may still be running */ + pcie->tx_tlp = (u64)readl(pcie->base + PCIE_RC_PL_STATS_TXTLP_LO) + + ((u64)readl(pcie->base + PCIE_RC_PL_STATS_TXTLP_HI) << 32ULL); + pcie->rx_tlp = (u64)readl(pcie->base + PCIE_RC_PL_STATS_RXTLP_LO) + + ((u64)readl(pcie->base + PCIE_RC_PL_STATS_RXTLP_HI) << 32ULL); + pcie->tx_dllp = (u64)readl(pcie->base + PCIE_RC_PL_STATS_TXDLLP_LO) + + ((u64)readl(pcie->base + PCIE_RC_PL_STATS_TXDLLP_HI) << 32ULL); + pcie->rx_dllp = (u64)readl(pcie->base + PCIE_RC_PL_STATS_RXDLLP_LO) + + ((u64)readl(pcie->base + PCIE_RC_PL_STATS_RXDLLP_HI) << 32ULL); + + pcie->pl_rx_err = readl(pcie->base + PCIE_RC_PL_STATS_RXPL_ERR); + pcie->dl_rx_err = readl(pcie->base + PCIE_RC_PL_STATS_RXDL_ERR); + pcie->tl_rx_err = readl(pcie->base + PCIE_RC_PL_STATS_RXTL_ERR); + + tmp = readl(pcie->base + PCIE_RC_PL_LTSSM_STATS_3); + pcie->l0s_exit_time = FIELD_GET(PCIE_RC_PL_LTSSM_STATS_3_TIME_L0S_MASK, tmp); + pcie->recov_time = FIELD_GET(PCIE_RC_PL_LTSSM_STATS_3_TIME_RECOV_MASK, tmp); + + tmp = readl(pcie->base + PCIE_RC_PL_LTSSM_STATS_CNT); + pcie->l0s_fail_cnt = FIELD_GET(PCIE_RC_PL_LTSSM_STATS_CNT_L0S_FAIL_MASK, tmp); + pcie->recov_cnt = FIELD_GET(PCIE_RC_PL_LTSSM_STATS_CNT_RECOV_MASK, tmp); +} + +/* + * Dump the link state machine transitions for the first 100ms after fundamental reset release. + * Most link training completes in a far shorter time. + * + * The CPU-intensive nature of the capture means that this should only be used to + * diagnose fatal link startup failures. + */ +static void brcm_pcie_trace_link_start(struct brcm_pcie *pcie) +{ + struct device *dev = pcie->dev; + struct trace_entry *trace = pcie->ltssm_trace_buf; + int i = 0, j = 0; + u8 cur_state; + u32 ltssm_hist0, ltssm_hist1 = 0; + ktime_t start, timeout; + + start = ktime_get(); + timeout = ktime_add(start, ktime_set(0, NSEC_PER_MSEC * 100)); + /* + * The LTSSM history registers are implemented as an "open FIFO" where register data + * shuffles along with each push - or moves under one's feet, if you prefer. + * We can't atomically read more than one 32bit value (covering 4 entries), so poll + * quickly while guessing the position of the first value we haven't seen yet. + */ + do { + /* + * This delay appears to work around a HW bug where data can temporarily + * appear nibble-shifted. + */ + ndelay(10); + /* Snapshot the FIFO state. Lowest different "byte" is latest data. */ + ltssm_hist0 = readl(pcie->base + PCIE_RC_PL_LTSSM_HIST(3)); + if (ltssm_hist0 == ltssm_hist1) + continue; + ltssm_hist1 = ltssm_hist0; + + /* + * If the "fifo" has changed, we don't know by how much. + * Scan through byte-wise and look for states + */ + for (j = 24; j >= 0; j -= 8) { + cur_state = (ltssm_hist0 >> j) & 0xff; + /* Unassigned entry */ + if (cur_state == 0xff) + continue; + if (i > 0 && trace[i-1].st == cur_state) { + /* + * This is probably what we last saw. + * Next byte should be a new entry. + */ + j -= 8; + break; + } else if (i == 0 && brcm_pcie_decode_ltssm_state(cur_state)) { + /* Probably a new valid entry */ + break; + } + } + + for (; j >= 0 && i < TRACE_BUF_LEN; j -= 8) { + cur_state = (ltssm_hist0 >> j) & 0xff; + trace[i].st = cur_state; + trace[i].ts = ktime_sub(ktime_get(), start); + i++; + } + if (i == TRACE_BUF_LEN) + break; + } while (!ktime_after(ktime_get(), timeout)); + + dev_info(dev, "LTSSM trace captured %d events (max %u):\n", + i, TRACE_BUF_LEN); + for (i = 0; i < TRACE_BUF_LEN; i++) { + if (!trace[i].st) + break; + dev_info(dev, "%llu : %02x - %s\n", + ktime_to_us(trace[i].ts), + trace[i].st, + brcm_pcie_decode_ltssm_state(trace[i].st)); + } +} + static int brcm_pcie_start_link(struct brcm_pcie *pcie) { struct device *dev = pcie->dev; @@ -1495,6 +1751,8 @@ static int brcm_pcie_start_link(struct brcm_pcie *pcie) if (pcie->gen) brcm_pcie_set_gen(pcie, pcie->gen); + brcm_pcie_stats_trigger(pcie, 0); + /* Unassert the fundamental reset */ if (pcie->tperst_clk_ms) { /* @@ -1518,6 +1776,8 @@ static int brcm_pcie_start_link(struct brcm_pcie *pcie) if (ret) return ret; + if (pcie->trace_ltssm) + brcm_pcie_trace_link_start(pcie); /* * Wait for 100ms after PERST# deassertion; see PCIe CEM specification * sections 2.2, PCIe r5.0, 6.6.1. @@ -1554,6 +1814,9 @@ static int brcm_pcie_start_link(struct brcm_pcie *pcie) pci_speed_string(pcie_link_speed[cls]), nlw, ssc_good ? "(SSC)" : "(!SSC)"); + /* Snapshot the boot-time stats */ + brcm_pcie_stats_capture(pcie); + /* * RootCtl bits are reset by perst_n, which undoes pci_enable_crs() * called prior to pci_add_new_bus() during probe. Re-enable here. @@ -1885,6 +2148,7 @@ static int brcm_pcie_resume_noirq(struct device *dev) static void __brcm_pcie_remove(struct brcm_pcie *pcie) { + debugfs_remove_recursive(pcie->debugfs_dir); brcm_msi_remove(pcie); brcm_pcie_turn_off(pcie); if (brcm_phy_stop(pcie)) @@ -2041,6 +2305,98 @@ static struct pci_ops brcm7425_pcie_ops = { .remove_bus = brcm_pcie_remove_bus, }; +static ssize_t debugfs_stats_trigger_write(struct file *filp, + const char __user *buf, + size_t count, loff_t *ppos) +{ + struct seq_file *m = filp->private_data; + struct brcm_pcie *pcie = m->private; + char kbuf[16] = {}; + unsigned long micros; + + if (count > sizeof(kbuf)) + return -EOVERFLOW; + + if (copy_from_user(kbuf, buf, count)) + return -EINVAL; + + if (kstrtol(kbuf, 0, µs) < 0) + return -EINVAL; + + if (micros >= (1 << 28)) + return -ERANGE; + + brcm_pcie_stats_trigger(pcie, micros); + return count; +} + +static int debugfs_stats_trigger_show(struct seq_file *s, void *unused) +{ + struct brcm_pcie *pcie = s->private; + u32 tmp; + + /* Return the state of the capture engine */ + tmp = readl(pcie->base + PCIE_RC_PL_STATS_CTRL); + tmp = FIELD_GET(PCIE_RC_PL_STATS_CTRL_EN_MASK, tmp); + seq_printf(s, "%u\n", tmp); + return 0; +} +DEFINE_SHOW_STORE_ATTRIBUTE(debugfs_stats_trigger); + +static ssize_t debugfs_stats_snapshot_write(struct file *filp, + const char __user *buf, + size_t count, loff_t *ppos) +{ + struct seq_file *m = filp->private_data; + struct brcm_pcie *pcie = m->private; + + /* Any write triggers a snapshot of the stats register set */ + brcm_pcie_stats_capture(pcie); + return count; +} + +static int debugfs_stats_snapshot_show(struct seq_file *s, void *unused) +{ + struct brcm_pcie *pcie = s->private; + + seq_printf(s, "tx_tlp:\t\t%llu\n", pcie->tx_tlp); + seq_printf(s, "rx_tlp:\t\t%llu\n", pcie->rx_tlp); + seq_printf(s, "tx_dllp:\t%llu\n", pcie->tx_dllp); + seq_printf(s, "rx_dllp:\t%llu\n", pcie->rx_dllp); + seq_printf(s, "pl_rx_err:\t%u\n", pcie->pl_rx_err); + seq_printf(s, "dl_rx_err:\t%u\n", pcie->dl_rx_err); + seq_printf(s, "tl_rx_err:\t%u\n", pcie->tl_rx_err); + seq_printf(s, "l0s_exit_time:\t%u\n", pcie->l0s_exit_time); + seq_printf(s, "recov_time:\t%u\n", pcie->recov_time); + seq_printf(s, "l0s_fail_cnt\t%u\n", pcie->l0s_fail_cnt); + seq_printf(s, "recov_cnt:\t%u\n", pcie->recov_cnt); + + return 0; +} +DEFINE_SHOW_STORE_ATTRIBUTE(debugfs_stats_snapshot); + +static void brcm_pcie_init_debugfs(struct brcm_pcie *pcie) +{ + char *name; + + name = devm_kasprintf(pcie->dev, GFP_KERNEL, "%pOFP", pcie->dev->of_node); + if (!name) + return; + + pcie->debugfs_dir = debugfs_create_dir(name, NULL); + if (!pcie->debugfs_dir) + return; + + debugfs_create_file("stats_snapshot", 0644, pcie->debugfs_dir, pcie, + &debugfs_stats_snapshot_fops); + debugfs_create_file("stats_trigger", 0644, pcie->debugfs_dir, pcie, + &debugfs_stats_trigger_fops); +} + +static bool trace_ltssm; +module_param(trace_ltssm, bool, 0444); +MODULE_PARM_DESC(trace_ltssm, "Capture and dump link states during link training"); + static int brcm_pcie_probe(struct platform_device *pdev) { struct device_node *msi_np __free(device_node) = NULL; @@ -2064,6 +2420,19 @@ static int brcm_pcie_probe(struct platform_device *pdev) pcie->dev = &pdev->dev; pcie->np = np; pcie->cfg = data; + pcie->trace_ltssm = trace_ltssm; + + brcm_pcie_init_debugfs(pcie); + + if (pcie->trace_ltssm) { + pcie->ltssm_trace_buf = devm_kzalloc(&pdev->dev, + sizeof(struct trace_entry) * TRACE_BUF_LEN, + GFP_KERNEL); + if (!pcie->ltssm_trace_buf) { + dev_err(&pdev->dev, "could not allocate trace buffer\n"); + return -ENOMEM; + } + } pcie->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(pcie->base))