diff --git a/ompi/errhandler/errhandler_invoke.c b/ompi/errhandler/errhandler_invoke.c index d9d3ede4677..7cb4e343d50 100644 --- a/ompi/errhandler/errhandler_invoke.c +++ b/ompi/errhandler/errhandler_invoke.c @@ -18,6 +18,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2023 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -180,6 +181,7 @@ int ompi_errhandler_request_invoke(int count, that had an error. */ for (; i < count; ++i) { if (MPI_REQUEST_NULL != requests[i] && + !requests[i]->req_persistent && MPI_SUCCESS != requests[i]->req_status.MPI_ERROR) { #if OPAL_ENABLE_FT_MPI /* Special case for MPI_ANY_SOURCE when marked as diff --git a/ompi/request/req_test.c b/ompi/request/req_test.c index b28ade8a67a..8eaa37e0d29 100644 --- a/ompi/request/req_test.c +++ b/ompi/request/req_test.c @@ -13,6 +13,7 @@ * Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -136,7 +137,7 @@ int ompi_request_default_test_any( if( request->req_persistent ) { request->req_state = OMPI_REQUEST_INACTIVE; - return OMPI_SUCCESS; + return request->req_status.MPI_ERROR; } /* If there is an error on the request, don't free it */ if (MPI_SUCCESS != request->req_status.MPI_ERROR) { @@ -248,27 +249,26 @@ int ompi_request_default_test_all( ompi_grequest_invoke_query(request, &request->req_status); } OMPI_COPY_STATUS(&statuses[i], request->req_status, true); - if( request->req_persistent ) { - request->req_state = OMPI_REQUEST_INACTIVE; - continue; - } - /* MPI-2:4.5.1 says that we can return MPI_ERR_IN_STATUS - even if MPI_STATUSES_IGNORE was used. Woot! */ - /* Only free the request if there was no error on it */ if (MPI_SUCCESS == request->req_status.MPI_ERROR) { - int tmp = ompi_request_free(rptr); - if (tmp != OMPI_SUCCESS) { - return tmp; - } - } else { rc = MPI_ERR_IN_STATUS; #if OPAL_ENABLE_FT_MPI if (MPI_ERR_PROC_FAILED == request->req_status.MPI_ERROR - || MPI_ERR_REVOKED == request->req_status.MPI_ERROR) { + || MPI_ERR_REVOKED == request->req_status.MPI_ERROR) { rc = request->req_status.MPI_ERROR; } #endif /* OPAL_ENABLE_FT_MPI */ } + if (request->req_persistent) { + request->req_state = OMPI_REQUEST_INACTIVE; + } else if (MPI_SUCCESS == request->req_status.MPI_ERROR) { + /* MPI-2:4.5.1 says that we can return MPI_ERR_IN_STATUS + even if MPI_STATUSES_IGNORE was used. Woot! */ + /* Only free the request if there was no error on it */ + int tmp = ompi_request_free(rptr); + if (tmp != OMPI_SUCCESS) { + return tmp; + } + } } } else { /* free request if required */ @@ -283,25 +283,24 @@ int ompi_request_default_test_all( if (OMPI_REQUEST_GEN == request->req_type) { ompi_grequest_invoke_query(request, &request->req_status); } - if( request->req_persistent ) { - request->req_state = OMPI_REQUEST_INACTIVE; - continue; - } - /* Only free the request if there was no error */ - if (MPI_SUCCESS == request->req_status.MPI_ERROR) { - int tmp = ompi_request_free(rptr); - if (tmp != OMPI_SUCCESS) { - return tmp; - } - } else { + if (MPI_SUCCESS != request->req_status.MPI_ERROR) { rc = MPI_ERR_IN_STATUS; #if OPAL_ENABLE_FT_MPI if (MPI_ERR_PROC_FAILED == request->req_status.MPI_ERROR - || MPI_ERR_REVOKED == request->req_status.MPI_ERROR) { + || MPI_ERR_REVOKED == request->req_status.MPI_ERROR) { rc = request->req_status.MPI_ERROR; } #endif /* OPAL_ENABLE_FT_MPI */ } + if (request->req_persistent) { + request->req_state = OMPI_REQUEST_INACTIVE; + } else if (MPI_SUCCESS == request->req_status.MPI_ERROR) { + /* Only free the request if there was no error */ + int tmp = ompi_request_free(rptr); + if (tmp != OMPI_SUCCESS) { + return tmp; + } + } } } @@ -398,7 +397,7 @@ int ompi_request_default_test_some( #endif /* OPAL_ENABLE_FT_MPI */ } - if( request->req_persistent ) { + if (request->req_persistent) { request->req_state = OMPI_REQUEST_INACTIVE; } else { /* Only free the request if there was no error */ diff --git a/ompi/request/req_wait.c b/ompi/request/req_wait.c index d66eccf9f79..f5b3d43deac 100644 --- a/ompi/request/req_wait.c +++ b/ompi/request/req_wait.c @@ -18,6 +18,7 @@ * Copyright (c) 2016 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -228,7 +229,7 @@ int ompi_request_default_wait_all( size_t count, size_t i, completed = 0, failed = 0; ompi_request_t **rptr; ompi_request_t *request; - int mpi_error = OMPI_SUCCESS; + int mpi_error = OMPI_SUCCESS, rc; ompi_wait_sync_t sync; if (OPAL_UNLIKELY(0 == count)) { @@ -356,13 +357,12 @@ int ompi_request_default_wait_all( size_t count, if( request->req_persistent ) { request->req_state = OMPI_REQUEST_INACTIVE; - continue; - } - /* Only free the request if there is no error on it */ - if (MPI_SUCCESS == request->req_status.MPI_ERROR) { + } else if (MPI_SUCCESS == request->req_status.MPI_ERROR) { + /* Only free the request if there is no error on it */ + /* If there's an error while freeing the request, - assume that the request is still there. - Otherwise, Bad Things will happen later! */ + assume that the request is still there. + Otherwise, Bad Things will happen later! */ int tmp = ompi_request_free(rptr); if (OMPI_SUCCESS == mpi_error && OMPI_SUCCESS != tmp) { mpi_error = tmp; @@ -373,7 +373,6 @@ int ompi_request_default_wait_all( size_t count, } } } else { - int rc; /* free request if required */ for( i = 0; i < count; i++, rptr++ ) { void *_tmp_ptr = &sync; @@ -382,8 +381,9 @@ int ompi_request_default_wait_all( size_t count, if( request->req_state == OMPI_REQUEST_INACTIVE ) { rc = ompi_status_empty.MPI_ERROR; - goto absorb_error_and_continue; + continue; } + rc = OMPI_SUCCESS; /* * Assert only if no requests were failed. * Since some may still be pending. @@ -406,7 +406,8 @@ int ompi_request_default_wait_all( size_t count, rc = MPI_ERR_PROC_FAILED_PENDING; } #endif /* OPAL_ENABLE_FT_MPI */ - goto absorb_error_and_continue; + mpi_error = MPI_ERR_IN_STATUS; + continue; } } assert( REQUEST_COMPLETE(request) ); @@ -417,18 +418,12 @@ int ompi_request_default_wait_all( size_t count, rc = ompi_grequest_invoke_query(request, &request->req_status); } - rc = request->req_status.MPI_ERROR; - - if( request->req_persistent ) { + if (request->req_persistent) { request->req_state = OMPI_REQUEST_INACTIVE; - } else if (MPI_SUCCESS == rc) { + } else if (MPI_SUCCESS == request->req_status.MPI_ERROR) { /* Only free the request if there is no error on it */ - int tmp = ompi_request_free(rptr); - if (OMPI_SUCCESS == mpi_error && OMPI_SUCCESS != tmp) { - mpi_error = tmp; - } + rc = ompi_request_free(rptr); } - absorb_error_and_continue: #if OPAL_ENABLE_FT_MPI if( (MPI_ERR_PROC_FAILED == rc) || (MPI_ERR_REVOKED == rc) ) { mpi_error = rc; @@ -441,7 +436,7 @@ int ompi_request_default_wait_all( size_t count, * passed to that function." * So we should do so here as well. */ - if( OMPI_SUCCESS == mpi_error && rc != OMPI_SUCCESS) { + if (OMPI_SUCCESS == mpi_error && OMPI_SUCCESS != rc) { mpi_error = MPI_ERR_IN_STATUS; } }