From 48490b95d636517c7a00e2f6c4301878e6cdd0c2 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Fri, 19 Sep 2025 14:40:21 -0600 Subject: [PATCH] Fix bug in MCA_PML_OB1_ADD_ACK_TO_PENDING that causes memory overruns or failure The MCA_PML_OB1_ADD_ACK_TO_PENDING method creates a mca_pml_ob1_pckt_pending_t to hold an ack to be sent later. This method builds the pending packet then puts it on the mca_pml_ob1.pckt_pending list for later transmission. It does not, however, set the required hdr_size field on the struct. This leads to issues when the packet is later sent because it could contain any value. With some btls this will lead to memory corruption (if the size is not checked against btl_max_send_size) or just allocation failure because the size is too big. In other situations it could lead to a truncated packet being send (if the size previously in hdr_size is smaller than an ack). To fix the issue this commit gets rid of the macro entirely and replaces it with a new inline helper method that does the same thing. This helper uses the existing mca_pml_ob1_add_to_pending helper (which sets hdr_size) to reduce duplicated code. Tested and verified this fixes a critical issue triggered on our hardware. Signed-off-by: Nathan Hjelm --- ompi/mca/pml/ob1/pml_ob1_recvreq.h | 34 ++++++++++++++---------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.h b/ompi/mca/pml/ob1/pml_ob1_recvreq.h index 402d3f4dcec..a266e3388bb 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.h @@ -407,23 +407,21 @@ static inline void mca_pml_ob1_recv_request_schedule( (void)mca_pml_ob1_recv_request_schedule_exclusive(req, start_bml_btl); } -#define MCA_PML_OB1_ADD_ACK_TO_PENDING(P, S, D, O, Sz) \ - do { \ - mca_pml_ob1_pckt_pending_t *_pckt; \ - \ - MCA_PML_OB1_PCKT_PENDING_ALLOC(_pckt); \ - _pckt->hdr.hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_ACK; \ - _pckt->hdr.hdr_ack.hdr_src_req.lval = (S); \ - _pckt->hdr.hdr_ack.hdr_dst_req.pval = (D); \ - _pckt->hdr.hdr_ack.hdr_send_offset = (O); \ - _pckt->hdr.hdr_ack.hdr_send_size = (Sz); \ - _pckt->proc = (P); \ - _pckt->bml_btl = NULL; \ - OPAL_THREAD_LOCK(&mca_pml_ob1.lock); \ - opal_list_append(&mca_pml_ob1.pckt_pending, \ - (opal_list_item_t*)_pckt); \ - OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); \ - } while(0) +static inline void mca_pml_ob1_add_ack_to_pending(ompi_proc_t *proc, uintptr_t src_req, void *dst_req, + uint64_t send_offset, uint64_t send_size) { + mca_pml_ob1_hdr_t hdr = { + .hdr_ack = { + .hdr_common = { .hdr_type = MCA_PML_OB1_HDR_TYPE_ACK }, + .hdr_src_req = { .lval = src_req }, + .hdr_dst_req = { .pval = dst_req }, + .hdr_send_offset = send_offset, + .hdr_send_size = send_size, + }, + }; + + mca_pml_ob1_add_to_pending(proc, /*bml_btl=*/NULL, /*order=*/0, + &hdr, sizeof(hdr.hdr_ack)); +} int mca_pml_ob1_recv_request_ack_send_btl(ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl, uint64_t hdr_src_req, void *hdr_dst_req, @@ -455,7 +453,7 @@ mca_pml_ob1_recv_request_ack_send(mca_btl_base_module_t* btl, } } - MCA_PML_OB1_ADD_ACK_TO_PENDING(proc, hdr_src_req, hdr_dst_req, + mca_pml_ob1_add_ack_to_pending(proc, hdr_src_req, hdr_dst_req, hdr_send_offset, size); return OMPI_ERR_OUT_OF_RESOURCE;