Skip to content

Commit

Permalink
IB/hfi1: Fix bugs with non-PAGE_SIZE-end multi-iovec user SDMA requests
Browse files Browse the repository at this point in the history
[ Upstream commit 00cbce5 ]

hfi1 user SDMA request processing has two bugs that can cause data
corruption for user SDMA requests that have multiple payload iovecs
where an iovec other than the tail iovec does not run up to the page
boundary for the buffer pointed to by that iovec.a

Here are the specific bugs:
1. user_sdma_txadd() does not use struct user_sdma_iovec->iov.iov_len.
   Rather, user_sdma_txadd() will add up to PAGE_SIZE bytes from iovec
   to the packet, even if some of those bytes are past
   iovec->iov.iov_len and are thus not intended to be in the packet.
2. user_sdma_txadd() and user_sdma_send_pkts() fail to advance to the
   next iovec in user_sdma_request->iovs when the current iovec
   is not PAGE_SIZE and does not contain enough data to complete the
   packet. The transmitted packet will contain the wrong data from the
   iovec pages.

This has not been an issue with SDMA packets from hfi1 Verbs or PSM2
because they only produce iovecs that end short of PAGE_SIZE as the tail
iovec of an SDMA request.

Fixing these bugs exposes other bugs with the SDMA pin cache
(struct mmu_rb_handler) that get in way of supporting user SDMA requests
with multiple payload iovecs whose buffers do not end at PAGE_SIZE. So
this commit fixes those issues as well.

Here are the mmu_rb_handler bugs that non-PAGE_SIZE-end multi-iovec
payload user SDMA requests can hit:
1. Overlapping memory ranges in mmu_rb_handler will result in duplicate
   pinnings.
2. When extending an existing mmu_rb_handler entry (struct mmu_rb_node),
   the mmu_rb code (1) removes the existing entry under a lock, (2)
   releases that lock, pins the new pages, (3) then reacquires the lock
   to insert the extended mmu_rb_node.

   If someone else comes in and inserts an overlapping entry between (2)
   and (3), insert in (3) will fail.

   The failure path code in this case unpins _all_ pages in either the
   original mmu_rb_node or the new mmu_rb_node that was inserted between
   (2) and (3).
3. In hfi1_mmu_rb_remove_unless_exact(), mmu_rb_node->refcount is
   incremented outside of mmu_rb_handler->lock. As a result, mmu_rb_node
   could be evicted by another thread that gets mmu_rb_handler->lock and
   checks mmu_rb_node->refcount before mmu_rb_node->refcount is
   incremented.
4. Related to STMicroelectronics#2 above, SDMA request submission failure path does not
   check mmu_rb_node->refcount before freeing mmu_rb_node object.

   If there are other SDMA requests in progress whose iovecs have
   pointers to the now-freed mmu_rb_node(s), those pointers to the
   now-freed mmu_rb nodes will be dereferenced when those SDMA requests
   complete.

Fixes: 7be8567 ("IB/hfi1: Don't remove RB entry when not needed.")
Fixes: 7724105 ("IB/hfi1: add driver files")
Signed-off-by: Brendan Cunningham <bcunningham@cornelisnetworks.com>
Signed-off-by: Patrick Kelsey <pat.kelsey@cornelisnetworks.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
Link: https://lore.kernel.org/r/168088636445.3027109.10054635277810177889.stgit@252.162.96.66.static.eigbox.net
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
Patrick Kelsey authored and gregkh committed May 11, 2023
1 parent 6bbc496 commit a2bd706
Show file tree
Hide file tree
Showing 11 changed files with 423 additions and 304 deletions.
1 change: 1 addition & 0 deletions drivers/infiniband/hw/hfi1/ipoib_tx.c
Expand Up @@ -251,6 +251,7 @@ static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx,
const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];

ret = sdma_txadd_page(dd,
NULL,
txreq,
skb_frag_page(frag),
frag->bv_offset,
Expand Down
66 changes: 14 additions & 52 deletions drivers/infiniband/hw/hfi1/mmu_rb.c
Expand Up @@ -126,7 +126,7 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
spin_lock_irqsave(&handler->lock, flags);
node = __mmu_rb_search(handler, mnode->addr, mnode->len);
if (node) {
ret = -EINVAL;
ret = -EEXIST;
goto unlock;
}
__mmu_int_rb_insert(mnode, &handler->root);
Expand All @@ -143,6 +143,19 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
return ret;
}

/* Caller must hold handler lock */
struct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler,
unsigned long addr, unsigned long len)
{
struct mmu_rb_node *node;

trace_hfi1_mmu_rb_search(addr, len);
node = __mmu_int_rb_iter_first(&handler->root, addr, (addr + len) - 1);
if (node)
list_move_tail(&node->list, &handler->lru_list);
return node;
}

/* Caller must hold handler lock */
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
unsigned long addr,
Expand All @@ -167,34 +180,6 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
return node;
}

bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
unsigned long addr, unsigned long len,
struct mmu_rb_node **rb_node)
{
struct mmu_rb_node *node;
unsigned long flags;
bool ret = false;

if (current->mm != handler->mn.mm)
return ret;

spin_lock_irqsave(&handler->lock, flags);
node = __mmu_rb_search(handler, addr, len);
if (node) {
if (node->addr == addr && node->len == len) {
list_move_tail(&node->list, &handler->lru_list);
goto unlock;
}
__mmu_int_rb_remove(node, &handler->root);
list_del(&node->list); /* remove from LRU list */
ret = true;
}
unlock:
spin_unlock_irqrestore(&handler->lock, flags);
*rb_node = node;
return ret;
}

void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
{
struct mmu_rb_node *rbnode, *ptr;
Expand Down Expand Up @@ -225,29 +210,6 @@ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
}
}

/*
* It is up to the caller to ensure that this function does not race with the
* mmu invalidate notifier which may be calling the users remove callback on
* 'node'.
*/
void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
struct mmu_rb_node *node)
{
unsigned long flags;

if (current->mm != handler->mn.mm)
return;

/* Validity of handler and node pointers has been checked by caller. */
trace_hfi1_mmu_rb_remove(node->addr, node->len);
spin_lock_irqsave(&handler->lock, flags);
__mmu_int_rb_remove(node, &handler->root);
list_del(&node->list); /* remove from LRU list */
spin_unlock_irqrestore(&handler->lock, flags);

handler->ops->remove(handler->ops_arg, node);
}

static int mmu_notifier_range_start(struct mmu_notifier *mn,
const struct mmu_notifier_range *range)
{
Expand Down
8 changes: 3 additions & 5 deletions drivers/infiniband/hw/hfi1/mmu_rb.h
Expand Up @@ -52,10 +52,8 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler);
int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
struct mmu_rb_node *mnode);
void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg);
void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
struct mmu_rb_node *mnode);
bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
unsigned long addr, unsigned long len,
struct mmu_rb_node **rb_node);
struct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler,
unsigned long addr,
unsigned long len);

#endif /* _HFI1_MMU_RB_H */
21 changes: 4 additions & 17 deletions drivers/infiniband/hw/hfi1/sdma.c
Expand Up @@ -1593,22 +1593,7 @@ static inline void sdma_unmap_desc(
struct hfi1_devdata *dd,
struct sdma_desc *descp)
{
switch (sdma_mapping_type(descp)) {
case SDMA_MAP_SINGLE:
dma_unmap_single(
&dd->pcidev->dev,
sdma_mapping_addr(descp),
sdma_mapping_len(descp),
DMA_TO_DEVICE);
break;
case SDMA_MAP_PAGE:
dma_unmap_page(
&dd->pcidev->dev,
sdma_mapping_addr(descp),
sdma_mapping_len(descp),
DMA_TO_DEVICE);
break;
}
system_descriptor_complete(dd, descp);
}

/*
Expand Down Expand Up @@ -3128,7 +3113,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,

/* Add descriptor for coalesce buffer */
tx->desc_limit = MAX_DESC;
return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx,
return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, NULL, tx,
addr, tx->tlen);
}

Expand Down Expand Up @@ -3167,10 +3152,12 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
return rval;
}
}

/* finish the one just added */
make_tx_sdma_desc(
tx,
SDMA_MAP_NONE,
NULL,
dd->sdma_pad_phys,
sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)));
tx->num_desc++;
Expand Down
16 changes: 11 additions & 5 deletions drivers/infiniband/hw/hfi1/sdma.h
Expand Up @@ -594,6 +594,7 @@ static inline dma_addr_t sdma_mapping_addr(struct sdma_desc *d)
static inline void make_tx_sdma_desc(
struct sdma_txreq *tx,
int type,
void *pinning_ctx,
dma_addr_t addr,
size_t len)
{
Expand All @@ -612,6 +613,7 @@ static inline void make_tx_sdma_desc(
<< SDMA_DESC0_PHY_ADDR_SHIFT) |
(((u64)len & SDMA_DESC0_BYTE_COUNT_MASK)
<< SDMA_DESC0_BYTE_COUNT_SHIFT);
desc->pinning_ctx = pinning_ctx;
}

/* helper to extend txreq */
Expand Down Expand Up @@ -643,6 +645,7 @@ static inline void _sdma_close_tx(struct hfi1_devdata *dd,
static inline int _sdma_txadd_daddr(
struct hfi1_devdata *dd,
int type,
void *pinning_ctx,
struct sdma_txreq *tx,
dma_addr_t addr,
u16 len)
Expand All @@ -652,6 +655,7 @@ static inline int _sdma_txadd_daddr(
make_tx_sdma_desc(
tx,
type,
pinning_ctx,
addr, len);
WARN_ON(len > tx->tlen);
tx->num_desc++;
Expand All @@ -672,6 +676,7 @@ static inline int _sdma_txadd_daddr(
/**
* sdma_txadd_page() - add a page to the sdma_txreq
* @dd: the device to use for mapping
* @pinning_ctx: context to be released at descriptor retirement
* @tx: tx request to which the page is added
* @page: page to map
* @offset: offset within the page
Expand All @@ -687,6 +692,7 @@ static inline int _sdma_txadd_daddr(
*/
static inline int sdma_txadd_page(
struct hfi1_devdata *dd,
void *pinning_ctx,
struct sdma_txreq *tx,
struct page *page,
unsigned long offset,
Expand Down Expand Up @@ -714,8 +720,7 @@ static inline int sdma_txadd_page(
return -ENOSPC;
}

return _sdma_txadd_daddr(
dd, SDMA_MAP_PAGE, tx, addr, len);
return _sdma_txadd_daddr(dd, SDMA_MAP_PAGE, pinning_ctx, tx, addr, len);
}

/**
Expand Down Expand Up @@ -749,7 +754,8 @@ static inline int sdma_txadd_daddr(
return rval;
}

return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, tx, addr, len);
return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, NULL, tx,
addr, len);
}

/**
Expand Down Expand Up @@ -795,8 +801,7 @@ static inline int sdma_txadd_kvaddr(
return -ENOSPC;
}

return _sdma_txadd_daddr(
dd, SDMA_MAP_SINGLE, tx, addr, len);
return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, NULL, tx, addr, len);
}

struct iowait_work;
Expand Down Expand Up @@ -1030,4 +1035,5 @@ extern uint mod_num_sdma;

void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid);

void system_descriptor_complete(struct hfi1_devdata *dd, struct sdma_desc *descp);
#endif
1 change: 1 addition & 0 deletions drivers/infiniband/hw/hfi1/sdma_txreq.h
Expand Up @@ -19,6 +19,7 @@
struct sdma_desc {
/* private: don't use directly */
u64 qw[2];
void *pinning_ctx;
};

/**
Expand Down
4 changes: 0 additions & 4 deletions drivers/infiniband/hw/hfi1/trace_mmu.h
Expand Up @@ -37,10 +37,6 @@ DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_search,
TP_PROTO(unsigned long addr, unsigned long len),
TP_ARGS(addr, len));

DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_remove,
TP_PROTO(unsigned long addr, unsigned long len),
TP_ARGS(addr, len));

DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_mem_invalidate,
TP_PROTO(unsigned long addr, unsigned long len),
TP_ARGS(addr, len));
Expand Down

0 comments on commit a2bd706

Please sign in to comment.