Skip to content

Commit

Permalink
prov/smr: Avoid smr freestack exhaustion
Browse files Browse the repository at this point in the history
Signed-off-by: Luke Robison <lrbison@amazon.com>
  • Loading branch information
lrbison committed Jun 1, 2023
1 parent 0bd76cc commit 8436880
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 1 deletion.
8 changes: 7 additions & 1 deletion prov/shm/src/smr.h
Original file line number Diff line number Diff line change
Expand Up @@ -417,9 +417,15 @@ static inline struct smr_inject_buf *
smr_get_txbuf(struct smr_region *smr)
{
struct smr_inject_buf * txbuf;
struct smr_freestack* fs;

pthread_spin_lock(&smr->lock);
txbuf = smr_freestack_pop(smr_inject_pool(smr));
fs = smr_inject_pool(smr);
if (smr_freestack_isempty(fs)) {
FI_WARN(&smr_prov, FI_LOG_EP_DATA, "Freestack is empty!");
txbuf = NULL;
} else
txbuf = smr_freestack_pop(fs);
pthread_spin_unlock(&smr->lock);

return txbuf;
Expand Down
6 changes: 6 additions & 0 deletions prov/shm/src/smr_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,8 @@ static void smr_send_name(struct smr_ep *ep, int64_t id)
if (ret == -FI_ENOENT)
return;
tx_buf = smr_get_txbuf(peer_smr);
if (!tx_buf)
return;

ce->cmd.msg.hdr.op = SMR_OP_MAX + ofi_ctrl_connreq;
ce->cmd.msg.hdr.id = id;
Expand Down Expand Up @@ -700,6 +702,10 @@ static ssize_t smr_do_inject(struct smr_ep *ep, struct smr_region *peer_smr, int
struct smr_inject_buf *tx_buf;

tx_buf = smr_get_txbuf(peer_smr);
if (!tx_buf) {
FI_WARN(&smr_prov, FI_LOG_EP_DATA, "smr_do_inject failing");
return -FI_ENOMEM;
}

smr_generic_format(cmd, peer_id, op, tag, data, op_flags);
smr_format_inject(cmd, desc, iov, iov_count, peer_smr, tx_buf);
Expand Down
4 changes: 4 additions & 0 deletions prov/shm/src/smr_msg.c
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,10 @@ static ssize_t smr_generic_sendmsg(struct smr_ep *ep, const struct iovec *iov,
(struct ofi_mr **)desc, iov, iov_count, total_len,
context, &ce->cmd);
if (ret) {
if (ret != -FI_EAGAIN)
FI_WARN(&smr_prov, FI_LOG_EP_CTRL, "Error in smr_generic_sendmsg op: %ld\n", ret);
if (ret == -FI_ENOMEM)
ret = -FI_EAGAIN;
smr_cmd_queue_discard(ce, pos);
goto unlock_cq;
}
Expand Down

0 comments on commit 8436880

Please sign in to comment.