From d579686310413d32b680155c84c9be4aa12f465a Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Fri, 10 Dec 2021 16:29:49 +0000 Subject: [PATCH 1/3] Revert "xhci: add a quirk to work around a suspected cache bug on VLI controllers" This reverts commit a1d0f808d4a0b7f7053095cd4ab97a4276bed9ff. --- drivers/usb/host/xhci-mem.c | 7 +------ drivers/usb/host/xhci-pci.c | 1 - drivers/usb/host/xhci.h | 1 - 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index f6507be7ccd809..0d336f7451c74e 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1445,7 +1445,6 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, unsigned int mult; unsigned int avg_trb_len; unsigned int err_count = 0; - unsigned int nsegs = 2; ep_index = xhci_get_endpoint_index(&ep->desc); ep_ctx = xhci_get_ep_ctx(xhci, virt_dev->in_ctx, ep_index); @@ -1456,10 +1455,6 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, ring_type = usb_endpoint_type(&ep->desc); - if (xhci->quirks & XHCI_VLI_TRB_CACHE_BUG && - udev->speed != USB_SPEED_SUPER) { - nsegs = 1; - } /* * Get values to fill the endpoint context, mostly from ep descriptor. * The average TRB buffer lengt for bulk endpoints is unclear as we @@ -1507,7 +1502,7 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, /* Set up the endpoint ring */ virt_dev->eps[ep_index].new_ring = - xhci_ring_alloc(xhci, nsegs, 1, ring_type, max_packet, mem_flags); + xhci_ring_alloc(xhci, 2, 1, ring_type, max_packet, mem_flags); if (!virt_dev->eps[ep_index].new_ring) return -ENOMEM; diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 8de4ed0ae4eba6..0880f5a42726f0 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -287,7 +287,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_LPM_SUPPORT; xhci->quirks |= XHCI_EP_CTX_BROKEN_DCS; xhci->quirks |= XHCI_AVOID_DQ_ON_LINK; - xhci->quirks |= XHCI_VLI_TRB_CACHE_BUG; } if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index c47f756339e51a..38650e4a99d2a2 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1886,7 +1886,6 @@ struct xhci_hcd { #define XHCI_NO_SOFT_RETRY BIT_ULL(40) #define XHCI_EP_CTX_BROKEN_DCS BIT_ULL(41) #define XHCI_AVOID_DQ_ON_LINK BIT_ULL(42) -#define XHCI_VLI_TRB_CACHE_BUG BIT_ULL(43) unsigned int num_active_eps; unsigned int limit_active_eps; From f157c81ff1f27b596edd955f1b1dfa10a643a580 Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Mon, 13 Dec 2021 15:05:56 +0000 Subject: [PATCH 2/3] xhci: refactor out TRBS_PER_SEGMENT define in runtime code In anticipation of adjusting the number of utilised TRBs in a ring segment, add trbs_per_seg to struct xhci_ring and use this instead of a compile-time define. Signed-off-by: Jonathan Bell --- drivers/usb/host/xhci-mem.c | 48 +++++++++++++++++++----------------- drivers/usb/host/xhci-ring.c | 12 +++++---- drivers/usb/host/xhci.c | 6 ++--- drivers/usb/host/xhci.h | 1 + 4 files changed, 37 insertions(+), 30 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 0d336f7451c74e..84128c5d24e55f 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -98,6 +98,7 @@ static void xhci_free_segments_for_ring(struct xhci_hcd *xhci, */ static void xhci_link_segments(struct xhci_segment *prev, struct xhci_segment *next, + unsigned int trbs_per_seg, enum xhci_ring_type type, bool chain_links) { u32 val; @@ -106,16 +107,16 @@ static void xhci_link_segments(struct xhci_segment *prev, return; prev->next = next; if (type != TYPE_EVENT) { - prev->trbs[TRBS_PER_SEGMENT-1].link.segment_ptr = + prev->trbs[trbs_per_seg - 1].link.segment_ptr = cpu_to_le64(next->dma); /* Set the last TRB in the segment to have a TRB type ID of Link TRB */ - val = le32_to_cpu(prev->trbs[TRBS_PER_SEGMENT-1].link.control); + val = le32_to_cpu(prev->trbs[trbs_per_seg - 1].link.control); val &= ~TRB_TYPE_BITMASK; val |= TRB_TYPE(TRB_LINK); if (chain_links) val |= TRB_CHAIN; - prev->trbs[TRBS_PER_SEGMENT-1].link.control = cpu_to_le32(val); + prev->trbs[trbs_per_seg - 1].link.control = cpu_to_le32(val); } } @@ -139,15 +140,17 @@ static void xhci_link_rings(struct xhci_hcd *xhci, struct xhci_ring *ring, (xhci->quirks & XHCI_AMD_0x96_HOST))); next = ring->enq_seg->next; - xhci_link_segments(ring->enq_seg, first, ring->type, chain_links); - xhci_link_segments(last, next, ring->type, chain_links); + xhci_link_segments(ring->enq_seg, first, ring->trbs_per_seg, + ring->type, chain_links); + xhci_link_segments(last, next, ring->trbs_per_seg, + ring->type, chain_links); ring->num_segs += num_segs; - ring->num_trbs_free += (TRBS_PER_SEGMENT - 1) * num_segs; + ring->num_trbs_free += (ring->trbs_per_seg - 1) * num_segs; if (ring->type != TYPE_EVENT && ring->enq_seg == ring->last_seg) { - ring->last_seg->trbs[TRBS_PER_SEGMENT-1].link.control + ring->last_seg->trbs[ring->trbs_per_seg - 1].link.control &= ~cpu_to_le32(LINK_TOGGLE); - last->trbs[TRBS_PER_SEGMENT-1].link.control + last->trbs[ring->trbs_per_seg - 1].link.control |= cpu_to_le32(LINK_TOGGLE); ring->last_seg = last; } @@ -314,14 +317,15 @@ void xhci_initialize_ring_info(struct xhci_ring *ring, * Each segment has a link TRB, and leave an extra TRB for SW * accounting purpose */ - ring->num_trbs_free = ring->num_segs * (TRBS_PER_SEGMENT - 1) - 1; + ring->num_trbs_free = ring->num_segs * (ring->trbs_per_seg - 1) - 1; } /* Allocate segments and link them for a ring */ static int xhci_alloc_segments_for_ring(struct xhci_hcd *xhci, struct xhci_segment **first, struct xhci_segment **last, - unsigned int num_segs, unsigned int cycle_state, - enum xhci_ring_type type, unsigned int max_packet, gfp_t flags) + unsigned int num_segs, unsigned int trbs_per_seg, + unsigned int cycle_state, enum xhci_ring_type type, + unsigned int max_packet, gfp_t flags) { struct xhci_segment *prev; bool chain_links; @@ -350,12 +354,12 @@ static int xhci_alloc_segments_for_ring(struct xhci_hcd *xhci, } return -ENOMEM; } - xhci_link_segments(prev, next, type, chain_links); + xhci_link_segments(prev, next, trbs_per_seg, type, chain_links); prev = next; num_segs--; } - xhci_link_segments(prev, *first, type, chain_links); + xhci_link_segments(prev, *first, trbs_per_seg, type, chain_links); *last = prev; return 0; @@ -387,16 +391,17 @@ struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci, if (num_segs == 0) return ring; + ring->trbs_per_seg = TRBS_PER_SEGMENT; ret = xhci_alloc_segments_for_ring(xhci, &ring->first_seg, - &ring->last_seg, num_segs, cycle_state, type, - max_packet, flags); + &ring->last_seg, num_segs, ring->trbs_per_seg, + cycle_state, type, max_packet, flags); if (ret) goto fail; /* Only event ring does not use link TRB */ if (type != TYPE_EVENT) { /* See section 4.9.2.1 and 6.4.4.1 */ - ring->last_seg->trbs[TRBS_PER_SEGMENT - 1].link.control |= + ring->last_seg->trbs[ring->trbs_per_seg - 1].link.control |= cpu_to_le32(LINK_TOGGLE); } xhci_initialize_ring_info(ring, cycle_state); @@ -429,16 +434,15 @@ int xhci_ring_expansion(struct xhci_hcd *xhci, struct xhci_ring *ring, unsigned int num_segs_needed; int ret; - num_segs_needed = (num_trbs + (TRBS_PER_SEGMENT - 1) - 1) / - (TRBS_PER_SEGMENT - 1); - + num_segs_needed = (num_trbs + (ring->trbs_per_seg - 1) - 1) / + (ring->trbs_per_seg - 1); /* Allocate number of segments we needed, or double the ring size */ num_segs = ring->num_segs > num_segs_needed ? ring->num_segs : num_segs_needed; ret = xhci_alloc_segments_for_ring(xhci, &first, &last, - num_segs, ring->cycle_state, ring->type, - ring->bounce_buf_len, flags); + num_segs, ring->trbs_per_seg, ring->cycle_state, + ring->type, ring->bounce_buf_len, flags); if (ret) return -ENOMEM; @@ -1825,7 +1829,7 @@ int xhci_alloc_erst(struct xhci_hcd *xhci, for (val = 0; val < evt_ring->num_segs; val++) { entry = &erst->entries[val]; entry->seg_addr = cpu_to_le64(seg->dma); - entry->seg_size = cpu_to_le32(TRBS_PER_SEGMENT); + entry->seg_size = cpu_to_le32(evt_ring->trbs_per_seg); entry->rsvd = 0; seg = seg->next; } diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 44e222d7804f74..78e154891f752e 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -87,15 +87,16 @@ static bool trb_is_link(union xhci_trb *trb) return TRB_TYPE_LINK_LE32(trb->link.control); } -static bool last_trb_on_seg(struct xhci_segment *seg, union xhci_trb *trb) +static bool last_trb_on_seg(struct xhci_segment *seg, + unsigned int trbs_per_seg, union xhci_trb *trb) { - return trb == &seg->trbs[TRBS_PER_SEGMENT - 1]; + return trb == &seg->trbs[trbs_per_seg - 1]; } static bool last_trb_on_ring(struct xhci_ring *ring, struct xhci_segment *seg, union xhci_trb *trb) { - return last_trb_on_seg(seg, trb) && (seg->next == ring->first_seg); + return last_trb_on_seg(seg, ring->trbs_per_seg, trb) && (seg->next == ring->first_seg); } static bool link_trb_toggles_cycle(union xhci_trb *trb) @@ -157,7 +158,8 @@ void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring) { /* event ring doesn't have link trbs, check for last trb */ if (ring->type == TYPE_EVENT) { - if (!last_trb_on_seg(ring->deq_seg, ring->dequeue)) { + if (!last_trb_on_seg(ring->deq_seg, ring->trbs_per_seg, + ring->dequeue)) { ring->dequeue++; goto out; } @@ -2976,7 +2978,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) * that clears the EHB. */ while (xhci_handle_event(xhci) > 0) { - if (event_loop++ < TRBS_PER_SEGMENT / 2) + if (event_loop++ < xhci->event_ring->trbs_per_seg / 2) continue; xhci_update_erst_dequeue(xhci, event_ring_deq); event_loop = 0; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 975e24e94e9196..34714c781a14af 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -858,8 +858,8 @@ static void xhci_clear_command_ring(struct xhci_hcd *xhci) seg = ring->deq_seg; do { memset(seg->trbs, 0, - sizeof(union xhci_trb) * (TRBS_PER_SEGMENT - 1)); - seg->trbs[TRBS_PER_SEGMENT - 1].link.control &= + sizeof(union xhci_trb) * (ring->trbs_per_seg - 1)); + seg->trbs[ring->trbs_per_seg - 1].link.control &= cpu_to_le32(~TRB_CYCLE); seg = seg->next; } while (seg != ring->deq_seg); @@ -870,7 +870,7 @@ static void xhci_clear_command_ring(struct xhci_hcd *xhci) ring->enq_seg = ring->deq_seg; ring->enqueue = ring->dequeue; - ring->num_trbs_free = ring->num_segs * (TRBS_PER_SEGMENT - 1) - 1; + ring->num_trbs_free = ring->num_segs * (ring->trbs_per_seg - 1) - 1; /* * Ring is now zeroed, so the HW should look for change of ownership * when the cycle bit is set to 1. diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 38650e4a99d2a2..4f6fcd7cfc1183 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1615,6 +1615,7 @@ struct xhci_ring { unsigned int num_trbs_free; unsigned int num_trbs_free_temp; unsigned int bounce_buf_len; + unsigned int trbs_per_seg; enum xhci_ring_type type; bool last_td_was_short; struct radix_tree_root *trb_address_map; From 87bc6b721901e0e100d8f92c198c6df3d25178b5 Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Mon, 13 Dec 2021 16:04:03 +0000 Subject: [PATCH 3/3] usb: xhci: add VLI_TRB_CACHE_BUG quirk The VL805 fetches up to 4 transfer TRBs at a time. TRB reads don't cross a 64B boundary, and if a TRB is fetched and is not on a 64B boundary, the read is sized up to the next 64B boundary. However the VL805 implements a readahead prefetch for TRBs on a transfer ring. This fetches the next 64B after any TRB read has happened. Near the end of a ring segment, the prefetcher can read the first 64B of the next page in physical memory and this is where the behaviour causes a bug. The controller does not tag reads with which endpoint they are for, so if the start of the next page is a ring segment used by a victim endpoint, and the victim endpoint is about to fetch TRBs from the start of the segment, the victim endpoint will read from the prefetched data and not perform a read to main memory. If the data is stale, the ring cycle state bit may not be correct and the endpoint will silently halt. Adjust trbs_per_seg for transfer rings allocated for this controller. See https://github.com/raspberrypi/linux/issues/4685 Signed-off-by: Jonathan Bell --- drivers/usb/host/xhci-mem.c | 11 +++++++++++ drivers/usb/host/xhci-pci.c | 1 + drivers/usb/host/xhci.h | 1 + 3 files changed, 13 insertions(+) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 84128c5d24e55f..562f0f7f6e15c2 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -392,6 +392,17 @@ struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci, return ring; ring->trbs_per_seg = TRBS_PER_SEGMENT; + /* + * The Via VL805 has a bug where cache readahead will fetch off the end + * of a page if the Link TRB of a transfer ring is in the last 4 slots. + * Where there are consecutive physical pages containing ring segments, + * this can cause a desync between the controller's view of a ring + * and the host. + */ + if (xhci->quirks & XHCI_VLI_TRB_CACHE_BUG && + type != TYPE_EVENT && type != TYPE_COMMAND) + ring->trbs_per_seg -= 4; + ret = xhci_alloc_segments_for_ring(xhci, &ring->first_seg, &ring->last_seg, num_segs, ring->trbs_per_seg, cycle_state, type, max_packet, flags); diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 0880f5a42726f0..8de4ed0ae4eba6 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -287,6 +287,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_LPM_SUPPORT; xhci->quirks |= XHCI_EP_CTX_BROKEN_DCS; xhci->quirks |= XHCI_AVOID_DQ_ON_LINK; + xhci->quirks |= XHCI_VLI_TRB_CACHE_BUG; } if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 4f6fcd7cfc1183..eae8ada4be46de 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1887,6 +1887,7 @@ struct xhci_hcd { #define XHCI_NO_SOFT_RETRY BIT_ULL(40) #define XHCI_EP_CTX_BROKEN_DCS BIT_ULL(41) #define XHCI_AVOID_DQ_ON_LINK BIT_ULL(42) +#define XHCI_VLI_TRB_CACHE_BUG BIT_ULL(43) unsigned int num_active_eps; unsigned int limit_active_eps;